{“id”:“https://openalex.org/W4320559155“,”doi“:”https://doi.org/10.48550/arxiv.2302.05016“title”:“多模态视觉监控对语言有益吗?”?“,”display_name“:“多模态视觉监控对语言有益吗?“,”“publication_year”:2023,“publication_date”:“2023-02-10”,“ids”:{“openalex”:“https://openalex.org/W4320559155“,”doi“:”https://doi.org/10.48550/arxiv.2302.05016“},”语言“:”en“,”primary_location“:{”id“:”pmh:oai:arXiv.org:2302.05016“,”is_oa“:true,”landing_page_url“:”http://arxiv.org/abs/2302.05016,“pdf_url”:https://arxiv.org/pdf/2302.05016,“源”:{“id”:https://openalex.org/S4306400194“,”display_name“:”arXiv(康奈尔大学)“,”issn_l“:null,”issn“:null,”is_oa“:true,”is.in_doaj“:false,”is_core“:fase,”host_organization“:”https://openalex.org/I205783295“,”“host_organization_name”:“康奈尔大学”,“host_organization_lineage”:[“https://openalex.org/I205783295“],”host_organization_lineage_names“:[],”type“:”repository“},”license“:”cc-by“,”licence_id“:”https://openalex.org/licenses/cc-by“,”“version”:“submittedVersion”,“is_accepted”:false,“is_published”:false,“raw_source_name”:“”,“raw _type”:null},“type”:“preprint”,“indexed_in”:[“arxiv”,“datacite”],“open_access”:{“is_oa”:true,“oa_status”:“green”,“oa_url”:“https://arxiv.org/pdf/2302.05016“,”any_repository_has_fulltext“:true},”authorships“:[{”author_position“:”第一“,”作者“:{”id“:”https://openalex.org/A5070457314“,”display_name“:”Avinash Madasu“,”orcid“:”https://orcid.org/0000-0002-3802-7618“},”机构“:[],”国家“:[[],”is_corresponding“:true,”raw_author_name“:”Madasu,Avinash“,”raw_affiliation_strings“:[],“从属关系”:[]},{“author_position”:“last”,“author”:{“id”:“https://openalex.org/A5087898808“,”display_name“:”Vasudev Lal“,”orcid“:”https://orcid.org/0000-0002-5907-9898“},”机构“:[],”国家“:[[],”is_corresponding“:false,”raw_author_name“:”Lal,Vasudev“,”raw _ afiliation_strings“:[],“附属机构”:[]}],“机构”:[]https://openalex.org/A5070457314“],”“corresponding_institution_ids”:[],“apc_list”:null,“apc _ paid”:null,“fwci”:null,“has_fulltext”:true,“cited_by_count”:0,“citation_normalized_percentile”:nul,“cited_by_percentile_year”:nuld,“biblio”:{“volume”:null,“issue”:nully,“first_page”:nulle},“is_retracted”:false,“is_parated”:false,“primary_topic”:{“id”:“https://openalex.org/T11714“,”display_name“:”多模态机器学习应用程序“,”score“:0.9998000264167786,”subfield“:{”id“:”https://openalex.org/subfields/1707“,”display_name“:”计算机视觉和模式识别“},”字段“:{”id“:”https://openalex.org/fields/17“,”display_name“:”计算机科学“},”域“:{”id“:”https://openalex.org/domains/3“,”display_name“:”物理科学“}},”主题“:[{”id“:”https://openalex.org/T11714“,”display_name“:”多模态机器学习应用程序“,”score“:0.9998000264167786,”subfield“:{”id“:”https://openalex.org/subfields/1707“,”display_name“:”计算机视觉和模式识别“},”字段“:{”id“:”https://openalex.org/fields/17“,”display_name“:”计算机科学“},”域“:{”id“:”https://openalex.org/domains/3“,”display_name“:”物理科学“}},{”id“:”https://openalex.org/T10028“,”display_name“:”Topic Modeling“,”score“:0.9932000041007996,”subfield“:{”id“:”https://openalex.org/subfields/1702“,”display_name“:”人工智能“},”字段“:{”id“:”https://openalex.org/fields/17“,”display_name“:”计算机科学“},”域“:{”id“:”https://openalex.org/domains/3“,”display_name“:”物理科学“}},{”id“:”https://openalex.org/T10181“,”display_name“:”自然语言处理技术“,”score“:0.9879999756813049,”subfield“:{”id“:”https://openalex.org/subfields/1702“,”display_name“:”人工智能“},”字段“:{”id“:”https://openalex.org/fields/17“,”display_name“:”计算机科学“},”域“:{”id“:”https://openalex.org/domains/3“,”display_name“:”物理科学“}}],”关键词“:[{”id“:”https://openalex.org/keywords/computer science网站“,”display_name“:”计算机科学“,”score“:0.7892952561378479},{”id“:”https://openalex.org/keywords/natural-language-processing“,”display_name“:”自然语言处理“,”score“:0.5936502218246},{”id“:”https://openalex.org/keywords/natural-language网站“,”display_name“:”自然语言“,”score“:0.5890165567398071},{”id“:”https://openalex.org/keywords/artificial-intelligence网站“,”display_name“:”人工智能“,”score“:0.5829882025718689},{”id“:”https://openalex.org/keywords/question-answering“,”display_name“:”问答“,”score“:0.5658835768699646},{”id“:”https://openalex.org/keywords/set“,”display_name“:”集合(抽象数据类型)“,”分数“:0.5608761310577393},{”id“:”https://openalex.org/keywords/modyne“,”display_name“:”Modality(human\u2013 computer interaction)“,”score“:0.5394983291625977},{”id“:”https://openalex.org/keywords/language-model网站“,”display_name“:”语言模型“,”score“:0.5344281196594238},{”id“:”https://openalex.org/keywords/编码器“,”display_name“:”编码器“,”score“:0.5163429975509644},{”id“:”https://openalex.org/keywords/commonsense-reasoning“,”display_name“:”常识推理“,”score“:0.4288976192474365},{”id“:”https://openalex.org/keywords/language-understanding“,”display_name“:”语言理解“,”score“:0.4210517108440399},{”id“:”https://openalex.org/keywords/modifies(https://openalex.org/关键词/模式)“,”display_name“:”Modalities“,”score“:0.4166988134384155},{”id“:”https://openalex.org/keywords/programming-language网站“,”display_name“:”编程语言“,”score“:0.08173766732215881}],”概念“:[{”id“:”https://openalex.org/C41008148,“wikidata”:https://www.wikidata.org/wiki/Q21198“,”display_name“:”计算机科学“,”level“:0,”score“:0.7892952561378479},{”id“:”https://openalex.org/C204321447,“wikidata”:https://www.wikidata.org/wiki/Q30642“,”display_name“:”自然语言处理“,”level“:1,”score“:0.5936502218246},{”id“:”https://openalex.org/C195324797,“wikidata”:https://www.wikidata.org/wiki/Q33742“,”display_name“:”自然语言“,”level“:2,”score“:0.5890165567398071},{”id“:”https://openalex.org/C154945302,“wikidata”:https://www.wikidata.org/wiki/Q11660“,”display_name“:”人工智能“,”level“:1,”score“:0.5829882025718689},{”id“:”https://openalex.org/C44291984,“wikidata”:https://www.wikidata.org/wiki/Q1074173“,”display_name“:”问答“,”等级“:2,”分数“:0.5658835768699646},{”id“:”https://openalex.org/C177264268,“wikidata”:https://www.wikidata.org/wiki/Q1514741“,”display_name“:”Set(abstract data type)“,”level“:2,”score“:0.56087613105773939},{”id“:”https://openalex.org/C2780226545,“wikidata”:https://www.wikidata.org/wiki/Q6888030“,”display_name“:”Modality(human\u2013 computer interaction)“,”level“:2,”score“:0.5394983291625977},{”id“:”https://openalex.org/C137293760,“wikidata”:https://www.wikidata.org/wiki/Q3621696“,”display_name“:”语言模型“,”level“:2,”score“:0.5344281196594238},{”id“:”https://openalex.org/C118505674,“wikidata”:https://www.wikidata.org/wiki/Q42586063“,”display_name“:”编码器“,”level“:2,”score“:0.5163429975509644},{”id“:”https://openalex.org/C193221554,“wikidata”:https://www.wikidata.org/wiki/Q5153664“,”display_name“:”常识推理“,”level“:2,”score“:0.4288976192474365},{”id“:”https://openalex.org/C2983448237,“wikidata”:https://www.wikidata.org/wiki/Q1078276“,”display_name“:”语言理解“,”level“:2,”score“:0.4210517108440399},{”id“:”https://openalex.org/C2779903281,“wikidata”:https://www.wikidata.org/wiki/Q6888026“,”display_name“:”Modalities“,”level“:2,”score“:0.4166988134384155},{”id“:”https://openalex.org/C199360897,“wikidata”:https://www.wikidata.org/wiki/Q9143“,”display_name“:”编程语言“,”level“:1,”score“:0.08173766732215881},{”id“:”https://openalex.org/C111919701,“wikidata”:https://www.wikidata.org/wiki/Q9135“,”display_name“:”操作系统“,”level“:1,”score“:0.0},{”id“:”https://openalex.org/C144024400,“wikidata”:https://www.wikidata.org/wiki/Q21201“,”display_name“:”社会学“,”level“:0,”score“:0.0},{”id“:”https://openalex.org/C36289849,“wikidata”:https://www.wikidata.org/wiki/Q34749“,”display_name“:”社会科学“,”level“:1,”score“:0.0}],”mesh“:[],”locations_count“:2,”location“:[{”id“:”pmh:oai:arXiv.org:2302.0016“,”is_oa“:true,”landing_page_url“:”http://arxiv.org/abs/2302.05016,“pdf_url”:https://arxiv.org/pdf/2302.05016,“源”:{“id”:https://openalex.org/S4306400194“,”display_name“:”arXiv(康奈尔大学)“,”issn_l“:null,”issn“:null,”is_oa“:true,”is.in_doaj“:false,”is_core“:fase,”host_organization“:”https://openalex.org/I205783295“,”“host_organization_name”:“康奈尔大学”,“host_organization_lineage”:[“https://openalex.org/I205783295“],”host_organization_lineage_names“:[],”type“:”repository“},”license“:”cc-by“,”licence_id“:”https://openalex.org/licenses/cc-by“,”“version”:“submittedVersion”,“is_accepted”:false,“is_published”:false,“raw_source_name”:“”,“raw类型”:null},{“id”:“doi:10.48550/arxiv.2302.0016”,“is _ oa”:true,“landing_page_url”:“https://doi.org/10.48550/arxiv.2302.05016“,”pdf_url“:空,”源“:{”id“:”https://openalex.org/S4306400194“,”display_name“:”arXiv(康奈尔大学)“,”issn_l“:null,”issn“:null,”is_oa“:true,”is.in_doaj“:false,”is_core“:fase,”host_organization“:”https://openalex.org/I205783295“,”“host_organization_name”:“康奈尔大学”,“host_organization_lineage”:[“https://openalex.org/I205783295“],”host_organization_lineage_names“:[],”type“:”repository“},”license“:”cc-by“,”licence_id“:”https://openalex.org/licenses/cc-by“,”version“:null,”is_accepted“:false,”is_published“:nul,”raw_source_name“:null,”raw _type“:”article“}],”best_oa_location“:{”id“:”pmh:oai:arXiv.org:2302.05016“,”is_oa“:true,”landing_page_url“:”http://arxiv.org/abs/2302.05016,“pdf_url”:https://arxiv.org/pdf/2302.05016,“源”:{“id”:https://openalex.org/S4306400194“,”display_name“:”arXiv(康奈尔大学)“,”issn_l“:null,”issn“:null,”is_oa“:true,”is.in_doaj“:false,”is_core“:fase,”host_organization“:”https://openalex.org/I205783295“,”“host_organization_name”:“康奈尔大学”,“host_organization_lineage”:[“https://openalex.org/I205783295“],”host_organization_lineage_names“:[],”type“:”repository“},”license“:”cc-by“,”licence_id“:”https://openalex.org/licenses/cc-by“,”version“:”submittedVersion“,”is_accepted“:false,”is_published“:false,”raw_source_name“:”“,”raw_type“:null},”sustainable_development_goals“:[{”display_name“:”素质教育“,”score“:0.8500000238418579,”id“https://metadata.un.org/sdg/4“}],”奖项“:[],”资助者“:[[],”has_content“:{”pdf“:true,”grobid_xml“:true},”content_urls“:{pdf”:“https://content.openalex.org/works/W4320559155.pdf“,”grobid_xml“:”https://content.openalex.org/works/W4320559155.grobid-xml“},”referenced_works_count“:0,”reforenced_works“:[],”related_works“:[”https://openalex.org/W4313191056","https://openalex.org/W4383337770","https://openalex.org/W4288267738","https://openalex.org/W4225546813","https://openalex.org/W4388937922","https://openalex.org/W3157284875","https://openalex.org/W2259406085","https://openalex.org/W2099715052","https://openalex.org/W3113264705","https://openalex.org/W2964413124“],”abstract_inverted_index“:{”Vision“:[0],”(image“:[1],”and“:[2,35,66,84],”video)“:[3],”-“:[4],”Language“:[5,64],”VL“:[6],”prepretermining“:[7],”is“:[8],”the“:[9,39,94107130137141],”recent“:[10],”popular“:[11],”paradigm“:[12],”that“:[13120],”reactived“:[14],”最先进“:[15],”结果“:[16133],”开启“:[17,62127136],”多模“:[18],”tasks“:[19],”like“:[20,87],”image-retrieval“,:[21],”video-retrieval,“:[22],”visual“:[23],”question“:[24],”answering“:[25],”etc.“:[26],”These“:[27132],”models“:[28,78,86105],”are“:[2],”trained“:[30,52],”in“:[31],”an“:[32],“unpervisived”:[33],”way“:[34],”极大”:[36],“利益”:[37],“来自”:[38],“互补”:[40],“形式”:[41],“监督。“:[42116],”In“:[43],”this“:[44],”paper“:[45],”we“:%46],”explore“:[47],”if“:48],”language“:[50,60,97108122],”representation“:[51,61,98109123],”using“:[53],”vision“:[54115],”supervision“:[55],”performance“:[56],”better“:[57],”than“:[58],”vanilla“:[59121],”Natural“理解“:[65],”常识“:[67],”推理“:[68],”基准。“:[69],“我们”:[70,92],“实验”:[71],“with”:[72],“a”:[73],“diversity”:[74],“set”:[75],“of”:[76,96,99103110129140],“image-text”:[77],“such”:[79],“as”:[80],“ALBEF”:[81],“BLIP”:[82],“METER”:[83],“video-text”:【85】,“ALPRO”,“:【88】,“时间冻结”:[89],“(FiT),”:[90],“暴力。“:[91],”比较“:[93],”性能“:[95126],”独立“:[100],”文本“:[10111],”编码器“:[102112],”这些“:[104],”到“:[106],”学习“:[113],”通过“:[114],”我们的“:[117],”实验“:[118],”建议“:[119],”显示“:[124],”高级“:[125],”大多数“:[128],”任务。“:[131],”棚“:[134],”光“:[135],”当前“:[138],”缺点“:[139],”视觉语言“:[142],”模型。“:[143]},”counts_by_year“:[],”updated_date“:”2025-11-06T06:51:31.235846“,”created_date:“2025-10-10T00:00:00”}“