{“状态”:“确定”,“消息类型”:“工作”,“信息版本”:“1.0.0”,“邮件”:{“索引”:{“日期部分”:[[2024,6,5]],“日期时间”:“2024-06-05T14:57:45Z”,“时间戳”:1717599465664},“参考计数”:72,“出版商”:“电气与电子工程师学会(IEEE 1T00:00:00Z“,“timestamp”:1640995200000},“content-version”:“vor”,“delay-in-days”:0,“URL”:“https:\/\/ieeexplore.iee.org\/Xplorehelp\/downloads\/license-information\/ieee.html”},{“start”:{“date-parts”:[2022,1,1]],“date-time”:“2022-01-01T00:00Z”,“timetamp”:6409952000000},,“URL”:“https:\/\/doi.org\/10.15223\/policy-029”},{“start”:{“date-parts”:[[2022,1,1]],“date-time”:“2022-01-01T00:00:00Z”,“timestamp”:1640995200000},“content-version”:“stm-asf”,“delay-in-days”:0,“URL”:“http:\/\/doi.org\/10.15223\/policy-037”}],“content-domain”:{“domain”:[],“crossmark-restrict”离子“:false},”短容器时间“:[“IEEE\/ACM Trans.Audio Speech Lang.Process.”],“published-print”:{“date-parts”:[[2022]]},“DOI”:“10.1109\/taslp.2022.3169634”,“type”:“journal-article”,“created”:{“date-ports”:[2022,4,26]],“date-time”:“2022-04-26T19:37:08Z”,“timestamp”:1651001828000},”page:“1679-1693”,“source”:”Crossref“,”is-referenced-by-count“:5,”标题“:[“Zero-Shot Normalization Driven Multi-Speaker Text-to-Speech Synthesis”],“前缀”:“10.1109”,“卷”:“30”,“作者”:[{“ORCID”:“http://\/ORCID.org\/00000-0002-0485-392X”,“authenticated-ORCID”:false,“给定”:“Neeraj”,“family”:“Kumar”,“sequence”:“first”,“affiliation”:[}“name”:“印度新德里印度理工学院巴蒂电信技术与管理学院”}]},{“given”:“Ankur”,“family”:“Narang”,“sequence”:“additional”,“affiliation”:[{“name”:“印度德里新德里印度技术学院巴蒂通信技术与管理学校”}]{“ORCID”:“http:\/\/orcid.org/0000-0003-2677-3071”,“authenticated orcid”:false,“given”:“Brejesh”,“family”:“Lall”,“sequence”:“additional”,“affiliation”:[{“name”:“Bharti School of Telecommunication Technology and Management,Indian Institute of Technology Delhi,New Delhi,India”}],“member”:“263”,“reference”:[{“key”:“ref1”,“文章标题”:“CSTR VCTK语料库:CSTR语音克隆工具包的英语多语言语料库(0.92版)”,“author”:“Yamagishi”,“year”:“2019”},{“key”:“ref2”,“doi-asserted-by”:“publisher”,“doi”:“10.1109”,ICASSP.2015.7178964”}:“Prentice Hall Ser.Artif.Intell.”},{“key”:“ref4”,“volume-title”:“统计自然语言处理基础”,“author”:“Manning”,“year”:“1999”},{“键”:“参考7”,“doi-asserted-by”:“publisher”,“doi”:“10.1007\/978-3-540-48830-9_2”},{“key”:“ref8”,“doi-asserted-by”:”publisher“,”doi“:”10.21236\/ADA110902“},”{“key”:”ref9“,”volume-title“:”模式识别和机器学习“,”author“:”Bishop“,”year“:”2006“}”,{”key“:”ref10“,”doi-assert-by“:”publider“,”doi:“10.1126\/science.aaa8415”},{“key”:“ref11”,“doi-asserted-by”:“publisher”,“doi”:“10.1109\/ICASSP.2018.8461368”},{“key”:“ref12”,“article-title”:“Fastspeech:快速、稳健和可控的文本到语音转换”,“volume-title“:Proc.Adv.Neural Inf.Process.Syst.”,“author”:“Ren”,“year”:“2019”}:“Ren”,“year”:“2021”},{“key”:“ref14”,“article-title”:“通过语音循环为野生扬声器进行语音合成”,“author”:“Taigman”,“年份”:“2017”}2018年“},{“key”:“ref16”,“article-title”:“将学习从说话人验证转移到多说话人文本语音合成”,“volume-title“:”Proc.Adv.Neural Inf.Process.Syst.“,”author“:”Jia“,”year“:”2018“},{“key”:”ref17“,”doi-asserted-by“:”publisher“,”doi“:”10.21437\/Interspeech.2020-3139“}”,{”key“:”ref18“,”article-title“:“带少量样本的神经声音克隆”,“volume-title”:“Proc.Adv.Neural Inf.Process.Syst.”,“author”:“Arik”,“year”:“2018”},{“key”:“ref19”,“article-title“:”Deep voice 2:Multi-speaker Neural text-to-speake“,“volum-title:“Deep voice 3:2000 speaker neural text-to-speech”,“author”:“Ping”,“year”:“2017”},{“key”:“ref21”,“first page”:《4693》,“article-title”:“Towards end-to-prosody transfer for expressive speech synthesis with tacotron”,“volume-title“:”Proc.Int.Conf.Mach.Learn.“,”author“:”Skerry-Ryan“,”year“:”2018,“article-title”:“风格标记:端到端语音合成中的无监督风格建模、控制和传输”,“卷-时间”:“Proc.Int.Conf.Mach.Learn.”,“作者”:“王”,“年份”:“2018”},{“密钥”:“ref23”,“doi-asserted-by”:“publisher”,“doi”:“10.1109\/ICASSP.2019.8683623”}:“主题模型的自动编码变分推理”,“volume-title”:“Proc.ICLR”,“author”:“Srivastava”,“year”:“2017”},{“key”:“ref25”,“doi-asserted-by”:“publisher”,”doi“:”10.1109 \/ICSPSP4076.2020.9053520“}”,{”key“:”ref26“,”article-title“:”端对端对抗性文本到语音“,”author“:”Donahue“,”year“:”2021“},”{“密钥”:“ref26”27“,”文章标题“:“Adaspech:自定义语音的自适应文本到语音转换”,“author”:“Chen”,“year”:“2021”},{“key”:“ref28”,“doi-asserted-by”:“publisher”,“doi”:“10.1109”\/ICSPS39728.2021.9414872“}”,{(密钥):“ref29”,“doi-assertd-by”:“publisher”,“DI”:“10.“doi”:“10.21437\/intespeech.2021-329”},{“key”:“ref31”,“first page”:“12 449”,“article-title”:“wav2vec 2.0:语音表征自我监督学习的框架”,“volume-title“:“Proc.Adv.Neural Inf.Process.Syst.”,“author”:“Baevski”,“year”:“2020”},{“key”:,{“key”:“ref33”,“article-title”:“贴近人类素质的变压器TTS”,“author”:“Li”,“year”:“2018”},{“key”:”ref34“,”doi-asserted-by“:”publisher“,”doi“:”10.1109\/ICSPSP4076.2020.9054535“},”key“:”ref35“,”doi-assertd-by“:”publisher“,”DI:“10.1109\/ICASSP.2018.461375”}“,{”key:“ref36”,“doi-asserted-by”:“publisher”,“doi”:“10.21437\/Odyssey.2018-11”},{“key”:“ref37”,“doi-asserted-by”:“publisher”,“doi”:“10.18653\/v1\/k16-1002”}ch.2021-1774“},{“key”:“ref40”,“article-title”:“Glow-TTS:通过单调对齐搜索实现文本到语音的生成流”,“author”:“Kim”,“year”:“2020”},{“key”:“ref41”,“doi-asserted-by”:“publisher”,“doi”:“10.21437\/Intespeech.2020-2867”}、{“key”:”ref42“,”first page“:”933“,”article-title“:”用门控卷积网络进行语言建模“,”volume-title:“Proc.Int.Conf.Mach.Learn”,“author”:“Dauphin”,“year”:“2017”},{“key”:“ref43”,“doi-asserted-by”:“publisher”,”doi“:”10.21437\/interseech.2020-1064“},”{“密钥”:“ref44”,“article-title”:“Attention is all you need”,“volume-title“:”Proc.Adv.Neural Inf.Process.Syst.“,”author“:”Vaswani“,”year“:”2017“}”,{”key“:”ref45“,”doi-as serted-by“:”publisher“,”doi“:”10.1109\/ICASSP.2019.8683143“},{“key”:“ref46”,“article-title”:“Layer normalization”,“author”:“Ba”,“year”:“2016”}“:”publisher“,”doi“:“10.21437\/Interspeech.2017-1386”},{“key”:“ref50”,“doi-asserted-by”:“publisher”,“doi”:“10.1016\/S0167-6393(98)00085-5”}2016年“},{”关键“:”参考52“,“article-title”:“G2pE”,“volume-title“:“GitHub Repository”,“author”:“Park”,“year”:“2019”},{“key”:“ref53”,“doi-asserted-by”:“publisher”,“doi”:“10.1109\/TASLP.2016.593263”}由“:”publisher“,”doi“:”10.1109\/PACRIM.1993.407206“}插入,{”key“:“ref56”,“doi asserted by”:“publisher”,“doi”:“10.1121\/1.1458024”},{“key”:“ref57”,“首页”:“2232”,“文章标题”:“通过hessian特征值密度对神经网络优化的研究”,“卷标题”:“Proc.Int.Conf.Mach.Earn.”,“author”:“Ghorbani”,“year”:“2019”},{“key”:“ref58”,“doi asserted by”:“publisher”,“doi”:“10.1137\/16M1104974”},{“key”:“ref59”,“article-title”:“深度网黑森人的全谱规模:动态与SGD训练和样本量”,“author”:“Papyan”,“year”:“2019”}:“一些大规模矩阵计算问题”,“volume-title”:“J.Compute.Appl.Math.”,“volume”:“74”,“author”:“Bai”,“year”:“1996”},{“key”:”ref62“,”doi-asserted-by“:”publisher“,”doi“:”10.1007 \/BF02142693“},“key“:”ref63“,”first page“:、“volume-title”:“Ann.Numer.Math.”,“volume”:“4”,“author”:“Bai”,“year”:“1996”},{“key”:”ref64“,“doi-asserted-by”:“publisher”,“doi”:“10.1515\/9781400833887”}、{“密钥”:“ref65”,“首页”:“161”,”article-title“:”大规模学习的权衡“,”volume-title“:”Proc.Adv.Neural Inf.Process.Syst.“,”author“:”Bottou“,”year“:”2008“},{”key“:”ref66“,“doi-asserted-by”:“publisher”,“doi”:“10.1137 \/16M1080173”},{“key”:“ref67”,“article-title”:“网络深度对优化前景的影响”,“author”:“Ghorbani”,“year”:“2019”}“doi”:“10.1007\/978-1-4615-3122-7”},{“键”:“ref70”,“doi-asserted-by”:“publisher”,“doi”:“10.1007\/springerreference_64474”},{“key”:“ref71”,“doi-asserte-by”:“publisher”,“DI:”10.1090\/S0025-5718-97-00861-2“},”{“key”:”ref72“,”doi-assert-by“:”publisher“,”doi“:”10.1137\/130934283“}],“container-title”:[“IEEE\/ACM音频事务,语音和语言处理“],”原文标题“:[],“链接”:[{“URL”:“http://\/xplorestaging.ieee.org\/ielx7\/6570655\/9657755\/09763046.pdf?arnumber=9763046”,“内容类型”:“未指定”,“content-version”:“vor”,“intended-application”:“similarity-checking”}],“存放”:{“日期部分”:[2024,1,24]],“日期时间”:“2024-01-24T00:51:38Z”,“时间戳”:170605749800},“score”(分数):1,“resource”(资源):{“primary”(主资源):}“URL”(URL):“https://ieeexplore.ieee.org\/document\/9763046\/”}},“副标题”:[],“短标题”:[],“已发布”:{“日期部分”:[[2022]]},“引用计数”:72,“URL”:“http:\/\/dx.doi.org/10.1109\/taslp.20223169634”,“关系”:{},“ISSN”:[“2329-9290”,“2329-9304”],“ISSN类型”:[{“值”:“2329-9290”,“type”:“print”},{“value”:“2329-9304”,“type”:“electronic”}],“subject”:[],“published”:{“date-parts”:[[2022]]}}