{“id”:“https://openalex.org/W4386436265“,”doi“:”https://doi.org/10.48550/arxiv.2309.00424“,”title“:”Learning Speech Representation From Contrastive Token-Acoustic Pretraining“,”display_name“:”Learning Speach Represent From Contractive Toke-Acoustic Pretraining“,”publication_year“:2023,”publitation_date“:”2023-01-01“,”ids“:{”openalex“:”https://openalex.org/W4386436265“,”doi“:”https://doi.org/10.48550/arxiv.2309.00424“},”language“:”en“,”primary_location“:{”is_oa“:true,”landing_page_url“:”https://arxiv.org/abs/2309.00424“,”pdf_url“:空,”源“:{”id“:”https://openalex.org/S4306400194“,”display_name“:”arXiv(康奈尔大学)“,”issn_l“:null,”issn“:null,”is_oa“:true,”is_ in_doaj“:false,”host_organization“:”https://openalex.org/I205783295“,”host_organization_name“:”康奈尔大学“,”host_organization_lineage“:[”https://openalex.org/I205783295“],”host_organization_lineage_names“:[”康奈尔大学“],“type”:“repository”},“license”:“other-oa”,“licence_id”:“https://openalex.org/licenses/other-oa网站“,”version“:”submittedVersion“,”is_accepted“:false,”is_published“:false},”type“:”preprint“,”type_crossref“:“posted-content”,”indexed_in“:[”arxiv“,”datacite“],”open_access“:{”is_oa“:true,”oa_status“:”green“,”oa_url“:”https://arxiv.org/abs/2309.00424“,”any_repository_has_fulltext“:true},”authorships“:[{”author_position“:”第一“,”作者“:{”id“:”https://openalex.org/A5028353824“,”display_name“:”Chunyu Qiang“,”orcid“:null},”institutions“:[],”countries“:[],”is_corresponding“:false,”raw_author_name“:”Qiang,Chunyue“,”raw _affiliation_strings“:],”afliations“:【】},{”author_position“:”middle“,”author“:{”id“:”https://openalex.org/A5019560977“,”display_name“:”郝丽“,”兽人“:”https://orcid.org/0000-0002-6294-6761“},”机构“:[],”国家“:[[],”is_corresponding“:false,”raw_author_name“:”李,郝“,”raw_affiliation_strings“:[],“隶属关系”:[]},{“author_position”:“middle”,“author”:{“id”:“https://openalex.org/A5001892611“,”display_name“:”田永超“,”orcid“:”https://orcid.org/0000-0001-9611-8967“},”机构“:[],”国家“:[[],”is_corresponding“:false,”raw_author_name“:”田一新“,”raw _ afiliation_strings“:[],“隶属关系”:[]},{“author_position”:“middle”,“author”:{“id”:“https://openalex.org/A5073918837“,”display_name“:”瑞波福“,”兽人“:”https://orcid.org/0000-0001-9598-1881“},”机构“:[],”国家“:[[],”is_corresponding“:false,”raw_author_name“:”傅瑞波“,”raw _ afiliation_strings“:[],“从属关系”:[]},{“author_position”:“middle”,“author”:{“id”:“https://openalex.org/A5085142676“,”display_name“:”Tao Wang“,”orcid“:”https://orcid.org/0000-0003-0951-5476“},”机构“:[],”国家“:[[],”is_corresponding“:false,”raw_author_name“:Wang,Tao”,“raw_affiliation_strings”:[],“affiliations”:[]},{“author_position”:“middle”,“author”:{“id”:“https://openalex.org/A5050763764“,”display_name“:”Longbiao Wang“,”orcid“:”https://orcid.org/0000-0002-4005-5036“},”机构“:[],”国家“:[[],”is_corresponding“:false,”raw_author_name“:”王,龙彪“,”raw_affiliation_strings“:[],“隶属关系”:[]},{“author_position”:“last”,“author”:{“id”:“https://openalex.org/A5017251198“,”display_name“:”健武党“,”兽人“:”https://orcid.org/0000-0002-9237-4821“},”机构“:[],”国家“:[],”is_corresponding“:false,”raw_author_name“:”党,建武“,”raw_affiation_strings“:[],”附属机构“:[]}],”国家_地区_计数“:0,”机构_地区_计数“:0,”对应机构_授权人“:[],”对应机构_ ID“:[],”apc_list“:null,”apc_payed“:null,”has_fulltext“:false,”cited_by_count“:0,”cited_by_percentile_year“:{“min”:0,“max”:78},“biblio”:{卷:null,“问题”:nullhttps://openalex.org/T10201“,”display_name“:”语音识别技术“,”score“:0.9991,”subfield“:{”id“:”https://openalex.org/subfields/1702“,”display_name“:”人工智能“},”字段“:{”id“:”https://openalex.org/fields/17“,”display_name“:”Computer Science“},”domain“:{”id“:”https://openalex.org/domains/3“,”display_name“:”物理科学“}},”主题“:[{”id“:”https://openalex.org/T10201“,”display_name“:”语音识别技术“,”score“:0.9991,”subfield“:{”id“:”https://openalex.org/subfields/1702“,”display_name“:”人工智能“},”字段“:{”id“:”https://openalex.org/fields/17“,”display_name“:”Computer Science“},”domain“:{”id“:”https://openalex.org/domains/3“,”display_name“:”物理科学“}},{”id“:”https://openalex.org/T11309“,”display_name“:”音频信号分类与分析“,”score“:0.9976,”subfield“:{”id“:”https://openalex.org/subfields/1711“,”display_name“:”信号处理“},”字段“:{”id“:”https://openalex.org/fields/17“,”display_name“:”Computer Science“},”domain“:{”id“:”https://openalex.org/domains/3“,”display_name“:”物理科学“}},{”id“:”https://openalex.org/T10860“,”display_name“:”语音增强技术“,”score“:0.9953,”subfield“:{”id“:”https://openalex.org/subfields/1711“,”display_name“:”信号处理“},”字段“:{”id“:”https://openalex.org/fields/17“,”display_name“:”Computer Science“},”domain“:{”id“:”https://openalex.org/domains/3“,”display_name“:”物理科学“}}],”关键词“:[{”id“:”https://openalex.org/keywords/audio-visual-speech-recognition(https://openalex.org/keywords/audio-visual-speech-recognition)“,”display_name“:”视听语音识别“,”score“:0.618658},{”id“:”https://openalex.org/keywords/end-to-end-speech-recognition(https://openalex.org/关键词/端到端语音识别)“,”display_name“:”端到端语音识别“,”score“:0.573947},{”id“:”https://openalex.org/keywords/automatic-speech-re认知“,”display_name“:”自动语音识别“,”score“:0.57114},{”id“:”https://openalex.org/keywords/aoustic-modeling“,”display_name“:”声学建模“,”score“:0.569167},{”id“:”https://openalex.org/keywords/speech-enhancement网站“,”display_name“:”语音增强“,”score“:0.561046}],”concepts“:[{”id“:”https://openalex.org/C41008148,“wikidata”:https://www.wikidata.org/wiki/Q21198(网址:https://www.wikidata.org/wiki/Q21198)“,”display_name“:”计算机科学“,”level“:0,”score“:0.8065063},{”id“:”https://openalex.org/C28490314,“wikidata”:https://www.wikidata.org/wiki/Q189436“,”display_name“:”语音识别“,”level“:1,”score“:0.70676607},{”id“:”https://openalex.org/C48145219,“wikidata”:https://www.wikidata.org/wiki/Q1335365“,”display_name“:”安全令牌“,”level“:2,”score“:0.641878},{”id“:”https://openalex.org/C157968479,“wikidata”:https://www.wikidata.org/wiki/Q3079876“,”display_name“:”音频挖掘“,”level“:4,”score“:0.5973996},{”id“:”https://openalex.org/C133378560,“wikidata”:https://www.wikidata.org/wiki/Q1753225“,”display_name“:”Paralanguage“,”level“:2,”score“:0.4897249},{”id“:”https://openalex.org/C54953205,“wikidata”:https://www.wikidata.org/wiki/Q4142201“,”display_name“:”语音分析“,”level“:4,”score“:0.43887186},{”id“:”https://openalex.org/C204321447,“wikidata”:https://www.wikidata.org/wiki/Q30642“,”display_name“:”自然语言处理“,”level“:1,”score“:0.42470905},{”id“:”https://openalex.org/C154945302,“wikidata”:https://www.wikidata.org/wiki/Q11660“,”display_name“:”人工智能“,”level“:1,”score“:0.40562576},{”id“:”https://openalex.org/C61328038,“wikidata”:https://www.wikidata.org/wiki/Q3358061“,”display_name“:”语音处理“,”level“:2,”score“:0.36885384},{”id“:”https://openalex.org/C155635449,“wikidata”:https://www.wikidata.org/wiki/Q4674699“,”display_name“:”声学模型“,”level“:3,”score“:0.3588407},{”id“:”https://openalex.org/C41895202,“wikidata”:https://www.wikidata.org/wiki/Q8162“,”display_name“:”语言学“,”level“:1,”score“:0.08500445},{”id“:”https://openalex.org/C138885662,“wikidata”:https://www.wikidata.org/wiki/Q5891“,”display_name“:”哲学“,”等级“:0,”分数“:0.0},{”id“:”https://openalex.org/C38652104,“wikidata”:https://www.wikidata.org/wiki/Q3510521“,”display_name“:”Computer security“,”level“:1,”score“:0.0}],”mesh“:[],”locations_count“:2,”locations“:[{”is_oa“:true,”landing_page_url“:”https://arxiv.org/abs/2309.00424“,”pdf_url“:空,”源“:{”id“:”https://openalex.org/S4306400194“,”display_name“:”arXiv(康奈尔大学)“,”issn_l“:null,”issn“:null,”is_oa“:true,”is_ in_doaj“:false,”host_organization“:”https://openalex.org/I205783295“,”host_organization_name“:”康奈尔大学“,”host_organization_lineage“:[”https://openalex.org/I205783295“],”host_organization_lineage_names“:[”康奈尔大学“],“type”:“repository”},“license”:“other-oa”,“licence_id”:“https://openalex.org/licenses/other-oa“,”version“:”submittedVersion“,”is_accepted“:false,”is_published“:false},{”is_oa“:false,”landing_page_url“:”https://api.datacite.org/dois/10.48550/arxiv.2309.00424“,”pdf_url“:空,”源“:{”id“:”https://openalex.org/S4393179698“,”display_name“:”DataCite API“,”issn_l“:null,”issn“:null,”is_oa“:true,”is_ in_doaj“:false,”host_organization“:”https://openalex.org/I4210145204“,”“host_organization_name”:“DataCite”,“host_organization_lineage”:[“https://openalex.org/I4210145204“],”host_organization_lineage_names“:[”DataCite“],”type“:”metadata“},”license“:null,”license_id“:null,”version“:null}],”best_oa_location“:{”is_oa“:true,”landing_page_url“:”https://arxiv.org/abs/2309.00424“,”pdf_url“:空,”源“:{”id“:”https://openalex.org/S4306400194“,”display_name“:”arXiv(康奈尔大学)“,”issn_l“:null,”issn“:null,”is_oa“:true,”is_ in_doaj“:false,”host_organization“:”https://openalex.org/I205783295“,”host_organization_name“:”康奈尔大学“,”host_organization_lineage“:[”https://openalex.org/I205783295“],”host_organization_lineage_names“:[”康奈尔大学“],“type”:“repository”},“license”:“other-oa”,“licence_id”:“https://openalex.org/licenses/other-oa“,”版本“:”提交版本“,”is_accepted“:false,”is_published“:false},”可持续发展目标“:[{”display_name“:”优质教育“,”id“:”https://metadata.un.org/sdg/4“,”score“:0.61}],”grants“:[],”datasets“:【】,”versions“:【],”referenced_works_count“:0,”referrenced_works“:[],”related_work“:[”https://openalex.org/W4321847012","https://openalex.org/W3151376046","https://openalex.org/W3089379469","https://openalex.org/W2964829415","https://openalex.org/W2903652364","https://openalex.org/W2619911963","https://openalex.org/W2157598242","https://openalex.org/W2129389759","https://openalex.org/W2066339445","https://openalex.org/W1799027130“],”ngrams_url“:”https://api.openalex.org/works/W4386436265/ngrams网站“,”“abstract_inverted_index”:{“For”:[0],“fine-grained”:[1,64184],“generation”:[2185],“and”:[3,14,32,53,75117140152166173186],“recognition”:[4,17187],“tasks”:[5189],“such”:[6,49],“as”:[7,50],“minimally-supervised”:[8170],“text-to-speech”:[9],“(TTS),”:[10],“voice”:[11],“conversion”:[12],“(VC),”:[13],“自动”:[15],“语音“:[16,24,68141153165191],”(ASR),“:[18],”the“:[19,46,97155],”intermediate“:[20,65,86],”representations“:[21,66,87],”extracted“:[22],”from“:[23,37,67,70,88],”should“:[25,56],”serve“:[26],”a“:[28,81126143180195],”bridge\“:[29],”between“:[30],”text“:[31],”声学“:[33,54],”信息“:[34],”包含“:[35],”信息“:[36,48105],”两者“:[38],”模式。“:[39,90],”The“:[40158175],”semantic“:[41],”content“:%42],”is“:[43,80161],”impressified“,”:[44],“while”:[45],”paralingualic“:[47],”speaker“:[51],”identity“:[52],”details“:[55],”be“:[57],”de emphasized“。“:[58],”“然而,”:[59,91],“现有”:[60,92],“方法”:[61,95],“用于”:[62,84106114183],“提取”:[63102],“遭受”:[69],“问题”:[71],“的”:[72],“过度”:[73],“冗余”:[74],“维度”:[76],“爆炸”。“:[77],“对比”:[78],“学习”:[79,94147],“好”:[82],“方法”:[83127178],“建模”:[85],“两个”:[89135],“对照”:[93],“in”:[96190],“音频”:[98108198],“字段”:[99],“焦点”:[100],“on”:[101163],“全局”:[103],“描述性”:[104],“下游”:[107188],“分类”:[109],“任务”:[110],“制作”:[111],“它们”:[112],“不适合“:[113],”TTS,“:[115171],”VC,“:116172],”ASR“:[118],”任务。“:[119],“收件人”:[120],“地址”:[121],“这些”:[122],“问题”,:[123],“我们”:[124],“提议”:[125],“命名”:[128],“对比”:[129],“代词声学”:[130],“预训练”:[131],“(CTAP)”,“:[132],“其中”:[133],“使用”:[134],“编码器”:[136],“到”:[137149],“带来”:[138],“音素”:[139151167],“into”:[142],“joint”:[144],“multimal”:[145],“space”,:[146],“how“:[148],”connect“:[150],”at“:[154],”frame“:[156],”level。“:[157],”CTAP“:[159177],”model“:[160],”trained“:[162],”210k“:[164],”pairs,“:[168],”reacing“:[169],”ASR。“:[174],”提议的“:[176],”提议的“:[179],”有希望的“:[181],”解决方案“:[182],”处理。“:[192],”我们“:[193],”提供“:[194],”网站“:[196],”带有“:[197],”样本。“:[199]},”cited_by_api_url“:”https://api.openalex.org/works?filter=cites:W4386436265“,”counts_by_year“:[],”updated_date“:”2024-06-14T21:25:22.984446“,”created_date:“2023-09-05”}“