{“状态”:“确定”,“消息类型”:“工作”,“信息版本”:“1.0.0”,“邮件”:{“索引”:{“日期-部件”:[[2024,9,23]],“日期-时间”:“2024-09-23T04:11Z”,“时间戳”:1727065151742},“引用-计数”:40,“发布者”:“IEEE”,“许可证”:[{“开始”:{-“日期-零件”:[2020,6,1]],”日期-时间“:”2020-06-01T00:00:00 Z“,”timestamp“:1590969600000},”content-version“:”vor“,“delay-in-days”:0,“URL”:“https:\/\/ieeexplore.iee.org\/Xplorehelp\/downloads\/license-information\/ieee.html”},{“start”:{“date-parts”:[[2020,6,1]],“date-time”:“2020-06-01T00:00:00Z”,“timestamp”:1590969600000},“content-version”:“stm-asf”,“delay-in-days“:0,”URL“https:\\/doi.org\/10.1 5223\/policy-029“},{“开始”:{“日期部分”:[[2020,6,1]],“date-time”:“2020-06-01T00:00:00Z”,“timestamp”:1590969600000},“content-version”:“stm-asf”,“delay-in-days”:0,“URL”:“https:\\/doi.org\/10.15223\/policy-037”}],“content-domain”:{“domain”:[],“crossmark-restriction”:false},”short-container-title“:[],”published-print“:{”date-parts“:[2020,6]},“doi”:“10.1109\/cvpr42600.2020.01248”,“类型”:“procesdings-article”,“created”:{“date-parts”:[[2020,8,5]],“date-time”:“2020-08-05T21:20:05Z”,“timestamp”:1596662405000},“page”:“12462-12471”,“source”:《Crossref》,“is-referenced-by-count”:32,“title”:[“Active Speakers in Context”],“prefix”:“10.1109”,“volume”:”124“,“author”:[{“给定”:“Juan Leon”,“family”:“Alcaza”r“,”sequence“:”first“,”affiliation“:[]},{“given”:“Fabian”,“family”:“Caba”,“sequence”:“additional”,“affiliation”:[]},{”given“:”Long“,”family“:”Mai“,”sequence“:”additional“,”affiliance“:[]neneneep,{(给定):“Federico”,“家族”:“Perazzi”,“序列”:“附加”,“从属关系”:[]},“givent”:“Joon-Young”,“家庭”:“Lee”,“sequence”,{“given”:“Pablo”,“family”:“Arbelaez”、“sequence”:“additional”,“affiliation”:[]},{“given”:“Bernard”,“family”:“Ghanem”,“sequences”:“附加”,“从属关系”:[]}],“member”:“263”,“reference”:[{“journal-title”:“多任务学习,用于视听主动说话人检测”,“year”:“0”,“author”:“yuan-hang”,“key”::“IEEE声学语音和信号处理国际会议(ICASSP)IEEE”,“文章标题”:“完全监督的说话人日记化”,“年份”:“0”,“作者”:“aonan”,“key”:“ref38”},{“journal-title”:“NeurIPS”,“article-title“:“Attention is all you need”,“year”:“2017”,“author”:“ashish”:“具有松散同步特征流的视觉语音识别”,“年份”:“2005”,“作者”:“kate”,“key”:“ref32”},{“日记标题”:“Ava-activespeaker用于主动说话人检测的视听数据集”,“年”:“2019”,“作家”:“约瑟夫”,“密钥”:“ref31”},“doi-asserted-by”:“publisher”,“doi”:“10.21437\/Interspeech.2018-2015”},{“journal-title”:“CVPR”,”article-title“:“详细视频理解的长期特征库”,“年份”:“2019”,“author”:“chao-yuan”,“key”:“ref36”}“},{“journal-title”:“通过扬声器条件谱图掩蔽进行语音滤波器目标语音分离”,“year”:“2018”,“author”:“quan”,“key”:“ref34”},{“日记标题”:“IEEE国际声学语音和信号处理会议(ICASSP)”,“article-title“:“完美匹配:改进视听同步的跨模态嵌入”,“year”:“0”,“author”:“soo-whan”,“key”:“ref10”},{“key“:“ref40”,“doi-asserted-by”:“crossref”,“first-page”:“409”,“doi”:“10.1007\/s11263-017-1033-7”,“article-title”:“揭示视频问答的时间上下文”,“volume”:”124“,”author“:”linchao“,”year“2017”,“journal-title“:”International journal of Computer Vision“},}”key“ref11”,“doi-asserted-by”:“publisher”,“doi”:“10.1109\/IME.2000.871073”},{“journal-title”:“CVPR”,《article-title》:“Imagenet:大型分层图像数据库”,“year”:“2009”,“author”:“jia”,”key“:”ref12“},”{“key”:“ref13”,”doi-assert-by“:”crossref“,”first page“545”,”doi“10.1016\/j.imavis.2008.04.018”,“物品标签”:“去掉电视视频中字符的自动命名”,“volume”:“27”,“author”:“mark”,“year”:“2009”,“journal-title”:“Image and Vision Computing”},{“journal title”:《CVPR》,“article-title》:“video action transformer network”,“year”:《2019》,“author”:“rohit”,“key”:《ref14》},}“jornal-tittle”::“通过学习不变映射降低维数”,“年份”:“2006”,“作者”:“raia”,“key”:“ref15”},{“journal-title”:“CVPR”,“文章标题”:“图像识别的深度剩余学习”,“年”:“2016”,“作家”:“kaiming”,“密钥”:“ref16”}“”:“批量规范化通过减少内部协变量转移加速深度网络训练”,“年份”:“2015”,“作者”:“ioffe”,“key”:“ref18”},{“key”:“ref19”,“doi断言”:“publisher”,“doi”:“10.1109\/TASLP.20192921890”},{“期刊标题”:“ECCV”,“文章标题”:“具有自我监督多感官功能的视听场景分析”,“年份”:“2018”,“author”:“owens”,“key”:“ref28”},{“journal-title”:“多模态交互国际会议(ICMI)”,“article-title“:“视听联合训练的主动说话人检测”,“year”:“0”,“auther”:“punarjay”,“key”:”ref4“},}“jornal-title:”ICML“,”article-title“:”多模态深度学习“,”year“:”2011“,”author“:”jiquan“,”key“:”ref27“}”,{“journal-title”:“多式互动国际会议(ICMI)”,“article-title(文章标题)”:“谁在讲话?视频中活跃演讲者的音频监督分类”,“year”:“0”,“author”:“punarjay”,“key”:“ref3”},{“日记标题”:“你说过吗?”,“年份”:“2017”,“author”:“joon son”,“key”:”ref6“},“journal-title“NeurIPS研讨会”,“文章标题”:“pytorch中的自动区分”,“year”:“0”,“author”:“adam”,“key”:“ref29”},{“journal title”:“Naver at activitynet challenge 2019 task b active speaker detection(ava)”,“year”:“2019”,“author”:“son chung”,“key”:“ref5”},{“journal title”:“CVPR”,“article title”:“野生唇读句子”,“year”:“2017”,“author”:“joon son”,“key”:“ref8”},{“journal-title”:“VoxCeleb2 Deep speaker recognition”,“year”:“2018”,“author”:“joon son”,“key”:“ref7”},{“日记标题”:“CVPR”,“article-title(文章标题)”:“Activitynet:人类活动理解的大规模视频基准”,“年”:“2015”,“作者”:“fabian caba”,“密钥”:“ref2”}:“过时:野外自动唇形同步”,“年份”:“2016”,“作者”:“chung”,“key”:“ref9”},{“journal-title”:“ECCV”,“article-title“:“时间动作检测器中的诊断错误”,“年”:“2018”,“作家”:“humam”,“密钥”:“ref1”}:“ref20”},{“journal-title”:“ICCV”,“article-title(文章标题)”:“视频人物再识别的全球时间表征”,“年份”:“2019”,“作者”:“建宁”,“密钥”:“ref22”}:“可学习的别针:人身份的跨模式嵌入”,“年份”:“2018”,“作者”:“arsha”,“密钥”:“ref24”},{“日志标题”:“NeurIPS”,“文章标题”:”使用森林查看树:与特征、对象和场景相关的图形模型“,”年份“:”2004“,”作者“:”kevin“,”密钥“:”ref23“},”日志标题“”:“Voxceleb a大规模说话人识别数据集”,“年份”:“2017年”,“作者”:“arsha”,“key”:“ref26”},{“新闻标题”:“CVPR”,”文章标题“:“看到的声音和听到的脸:跨模态生物特征匹配”,“年”:“2018年”,”作者“:”arsha“,”key“:”ref25“}],“事件”:{“名称”:“2020 IEEE\/CVF计算机视觉和模式识别会议(CVPR)”,“start”:{“date-parts”:[[2020,6,13]]},“location”:“Seattle,WA,USA”,“end”:{“date-parts”:[[2020,6,19]]}},”container-title“:[”2020 IEEE\/CVF计算机视觉和模式识别会议(CVPR)“],“original-title”:[],“link”:[{“URL”:“http://explorestaging.IEEE.org\/ielx7\/9142308\/9156271\/091157027.pdf?arnumber=9157027“,”内容类型“:“unspecified”,“content-version”:“vor”,“intended-application”:“similarity-checking”}],“deposed”:{“date-parts”:[[2022,6,27]],“date-time”:“2022-06-27T15:49:21Z”,“timestamp”:1656344961000},“score”:1,“resource”:{“primary”:“{”URL:“https:\/\/ieeexplore.iee.org\/document\/9157027\/”},”substitle“:[],”short标题“:[],”已发布“:{“日期部分”:[[2020,6]]},“references-count”:40,“URL”:“http://\/dx.doi.org\/10.109\/cvpr42600.2020.01248”,“relation”:{},“subject”:[],“published”:{“date-parts”:[[2020,6]]}}}