{“id”:“https://openalex.org/W4322747033“,”doi“:”https://doi.org/10.1007/978-3-031-26316-3_39“,”title“:“用于高性能基于文本的可视问题解答的两阶段多模态融合”,“display_name”:“用于基于文本的高性能可视问题答复的两阶段多模态融合”、“publication_year”:2023,“publication_date”:“2023-01-01”,“ids”:{“openalex”:“https://openalex.org/W4322747033“,”doi“:”https://doi.org/10.1007/978-3-031-26316-3_39“},”language“:”en“,”primary_location“:{”is_oa“:false,”landing_page_url“:”https://doi.org/10.1007/978-3-031-26316-3_39“,”pdf_url“:空,”源“:{”id“:”https://openalex.org/S106296714“,”“display_name”“:”“计算机科学课堂讲稿”“,”issn_l“:”0302-9743“,”isn“:[”0302-7743“、”1611-3349“],”is_oa“:false,”is_in_doaj“:false,”host_organization“:”https://openalex.org/P4310318900“,”“host_organization_name”:“Springer Science+Business Media”,“host_organization_lineage”:[“https://openalex.org/P431031965","https://openalex.org/P4310318900“],”host_organization_lineage_names“:[”Springer Nature“,”Springer-Science+Business Media“],“type”:“book-series”},“license”:null,“licence_id”:nul,“version”:null,“is_accepted”:false,“is_published”:false},”type“:”book-chapter“,”type_crossref“:“book-chapter”,”indexed_in“:[“crossref”],”open_access“:{”is_oa“:false”“,”oa_url“:空,”any_repository_has_fulltext“:false},”作者身份“:[{”作者位置“:”第一个“,”作者“:{”id“:”https://openalex.org/A5028461627“,”display_name“:”Bingjia Li“,”orcid“:null},”institutions“:[{”id“:”https://openalex.org/I24943067“,”display_name“:”复旦大学“,”ror“:”https://ror.org/013q1eq08“,”country_code“:”CN“,”type“:“教育”,”世系“:[”https://openalex.org/I24943067“]}],”国家“:[”CN“],”is_corresponding“:false,”raw_author_name“:”Bingjia Li“,”rau_affiliation_strings“:[“上海智能信息处理重点实验室,复旦大学计算机科学学院,上海,200438”]},{“author_position”:“middle”,“author”:{“id”:“https://openalex.org/A5075877965“,”display_name“:”Jie Wang“,”orcid“:”https://orcid.org/0000-0002-3023-804“},”机构“:[],”国家“:[”CN“],”is_corresponding“:false,”raw_author_name“:”Jie Wang“,”raw关联字符串“:[“ByteDance,中国上海”]},{“author_position”:“middle”,“author”:{“id”:“https://openalex.org/A5045420812“,”display_name“:”Minyi Zhao“,”orcid“:”https://orcid.org/0000-0001-7720-806X},“机构”:[{“id”:https://openalex.org/I24943067“,”display_name“:”复旦大学“,”ror“:”https://ror.org/013q1eq08“,”country_code“:”CN“,”type“:“教育”,”世系“:[”https://openalex.org/I24943067“]}],”国家“:[”CN“],”is_corresponding“:false,”raw_author_name“:”Minyi Zhao“,”raw _affiliation_strings“:[“上海智能信息处理重点实验室,复旦大学计算机科学学院,上海,200438”]},{“author_position”:“last”,“author”:{“id”:“https://openalex.org/A5017862559“,”display_name“:”水耕洲“,”兽人“:”https://orcid.org/0000-0002-1949-2768},“机构”:[{“id”:https://openalex.org/I24943067“,”display_name“:”复旦大学“,”ror“:”https://ror.org/013q1eq08“,”country_code“:”CN“,”type“:“教育”,”世系“:[”https://openalex.org/I24943067“]}],”国家“:[”CN“],”is_corresponding“:false,”raw_author_name“:”周水耕“,”raw _ afiliation_strings“:[“上海智能信息处理重点实验室,复旦大学计算机科学学院,上海,200438”]}]“countries _ distinct_count”:1,”institutions _ disting_count“:1,“corresponding_author_ids”:[],“corresponding_institution_ids“:[],”apc_list“:{“value”:5000,”currency“:”EUR“,”value_usd“:5392,”provenance“:”doaj“},”apc _payed“:”value“:5000,“currench”:“EUR”,”value_ usd“:5392,“provenance”:“doaj”},“has_fulltext”:false,“cited_by_count”:1,“ciped_by_percentile_year”:{”min“:78,“max”:87},biblio“:{”volume“:null,”issue“:nul,”first_page“:”658“,”last_page“:”674“},”is_retracted“:false,”is_paratext“:fase,”primary_topic“:{”id“:”https://openalex.org/T11714“,”“display_name”:“图像和视频中的可视问答”,“score”:1.0,“subfield”:{“id”:“https://openalex.org/subfields/1707“,”display_name“:”计算机视觉和模式识别“},”字段“:{”id“:”https://openalex.org/fields/17“,”display_name“:”Computer Science“},”domain“:{”id“:”https://openalex.org/domains/3“,”display_name“:”物理科学“}},”主题“:[{”id“:”https://openalex.org/T11714“,”“display_name”:“图像和视频中的可视问答”,“score”:1.0,“subfield”:{“id”:“https://openalex.org/subfields/1707“,”display_name“:”计算机视觉和模式识别“},”字段“:{”id“:”https://openalex.org/fields/17“,”display_name“:”Computer Science“},”domain“:{”id“:”https://openalex.org/domains/3“,”display_name“:”物理科学“}},{”id“:”https://openalex.org/T10627“,”display_name“:”图像特征检索和识别技术“,”score“:0.9984,”subfield“:{”id“:”https://openalex.org/subfields/1707“,”display_name“:”计算机视觉和模式识别“},”字段“:{”id“:”https://openalex.org/fields/17“,”display_name“:”Computer Science“},”domain“:{”id“:”https://openalex.org/domains/3“,”display_name“:”物理科学“}},{”id“:”https://openalex.org/T11307“,”display_name“:”转移学习和领域适应的进展“,”score“:0.9926,”subfield“:{”id“:”https://openalex.org/subfields/s702“,”display_name“:”人工智能“},”字段“:{”id“:”https://openalex.org/fields/17“,”display_name“:”Computer Science“},”domain“:{”id“:”https://openalex.org/domains/3“,”display_name“:”物理科学“}}],”关键词“:[{”id“:”https://openalex.org/keywords/visual-question-answering“,”display_name“:”可视问答“,”score“:0.62861},{”id“:”https://openalex.org/keywords/multimal-fusion(多模态融合)“,”display_name“:”Multimodal Fusion“,”score“:0.583292},{”id“:”https://openalex.org/keywords/image-captioning网站“,”display_name“:”图像字幕“,”score“:0.533926},{”id“:”https://openalex.org/keywords/visual-recognition网站“,”display_name“:”视觉识别“,”score“:0.503097}],”concepts“:[{”id“:”https://openalex.org/C41008148,“wikidata”:https://www.wikidata.org/wiki/Q21198“,”display_name“:”计算机科学“,”level“:0,”score“:0.8851905},{”id“:”https://openalex.org/C44291984,“wikidata”:https://www.wikidata.org/wiki/Q1074173网址“,”“display_name”“:”“问答”“,”级别“:2,”分数“:0.6344458},{”id“:”https://openalex.org/C154945302,“wikidata”:https://www.wikidata.org/wiki/Q11660“,”display_name“:”人工智能“,”level“:1,”score“:0.61903214},{”id“:”https://openalex.org/C153083717,“wikidata”:https://www.wikidata.org/wiki/Q6535263“,”display_name“:”Leverage(statistics)“,”level“:2,”score“:0.6001332},{”id“:”https://openalex.org/C2779903281,“wikidata”:https://www.wikidata.org/wiki/Q6888026“,”display_name“:”Modalities“,”level“:2,”score“:0.5422569},{”id“:”https://openalex.org/C204321447,“wikidata”:https://www.wikidata.org/wiki/Q30642“,”display_name“:”自然语言处理“,”level“:1,”score“:0.53958243},{”id“:”https://openalex.org/C152124472,“wikidata”:https://www.wikidata.org/wiki/Q1204361“,”display_name“:”冗余(工程)“,”level“:2,”score“:0.5248273},{”id“:”https://openalex.org/C36464697,“wikidata”:https://www.wikidata.org/wiki/Q451553“,”display_name“:”可视化“,”级别“:2,”分数“:0.46639225},{”id“:”https://openalex.org/C2780910867网址,“wikidata”:https://www.wikidata.org/wiki/Q1952416“,”display_name“:”多模态“,”level“:2,”score“:0.4578745},{”id“:”https://openalex.org/C192209626,“wikidata”:https://www.wikidata.org/wiki/Q190909“,”display_name“:”焦点(光学)“,”级别“:2,”分数“:0.41904327},{”id“:”https://openalex.org/C23123220,“wikidata”:https://www.wikidata.org/wiki/Q816826“,”display_name“:”信息检索“,”level“:1,”score“:0.37276563},{”id“:”https://openalex.org/C136764020,“wikidata”:https://www.wikidata.org/wiki/Q466“,”display_name“:”万维网“,”level“:1,”score“:0.0730184},{”id“:”https://openalex.org/C36289849,“wikidata”:https://www.wikidata.org/wiki/Q34749“,”display_name“:”社会科学“,”level“:1,”score“:0.0},{”id“:”https://openalex.org/C144024400,“wikidata”:https://www.wikidata.org/wiki/Q21201“,”display_name“:”社会学“,”level“:0,”score“:0.0},{”id“:”https://openalex.org/C111919701,“wikidata”:https://www.wikidata.org/wiki/Q9135“,”display_name“:”操作系统“,”level“:1,”score“:0.0},{”id“:”https://openalex.org/C121332964,“wikidata”:https://www.wikidata.org/wiki/Q413“,”display_name“:”物理“,”级别“:0,”分数“:0.0},{”id“:”https://openalex.org/C120665830,“wikidata”:https://www.wikidata.org/wiki/Q14620“,”display_name“:”Optics“,”level“:1,”score“:0.0}],”mesh“:[],”locations_count“:1.”locations“:[{”is_oa“:false,”landing_page_url“:”https://doi.org/10.1007/978-3-031-26316-3_39“,”pdf_url“:空,”源“:{”id“:”https://openalex.org/S106296714“,”“display_name”“:”“计算机科学课堂讲稿”“,”issn_l“:”0302-9743“,”isn“:[”0302-7743“、”1611-3349“],”is_oa“:false,”is_in_doaj“:false,”host_organization“:”https://openalex.org/P4310318900“,”“host_organization_name”:“Springer Science+Business Media”,“host_organization_lineage”:[“https://openalex.org/P431031965","https://openalex.org/P4310318900“],”host_organization_lineage_names“:[”Springer Nature“,”Spring er Science+Business Media“],“type”:“book series”},“license”:null,“licence_id”:nul,“version”:null,“is_accepted”:false,“is_published”:false}],“best_oa_location”:nuld,“sustainable_development_goals”:[{“display_name”:“素质教育”,“id”:”https://metadata.un.org/sdg/4“,”score“:0.89}],”grants“:[],”datasets“:[],”versions“:[】,”referenced_works_count“:28,”referrenced_works“:【”https://openalex.org/W1933349210","https://openalex.org/W1966382373","https://openalex.org/W2053317383","https://openalex.org/W2493916176","https://openalex.org/W2745461083","https://openalex.org/W2809273748","https://openalex.org/W2979382951","https://openalex.org/W2988326850","https://openalex.org/W3004268082","https://openalex.org/W3034336960","https://openalex.org/W3034943799","https://openalex.org/W3035644209","https://openalex.org/W3093385053","https://openalex.org/W3104953317","https://openalex.org/W3106859150","https://openalex.org/W3108319047","https://openalex.org/W3110661548","https://openalex.org/W3115287481","https://openalex.org/W3176851559","https://openalex.org/W3177934633","https://openalex.org/W3179897446","https://openalex.org/W3181159501","https://openalex.org/W3205050305","https://openalex.org/W3205209899","https://openalex.org/W3206082179","https://openalex.org/W3215381707","https://openalex.org/W3215633354","https://openalex.org/W4312263373“],”related_works“:[”https://openalex.org/W4386576699","https://openalex.org/W4377703168","https://openalex.org/W4239305747","https://openalex.org/W2248852396","https://openalex.org/W2141765414","https://openalex.org/W2111997505","https://openalex.org/W207304934网址","https://openalex.org/W2060883057","https://openalex.org/W199123384","https://openalex.org/W1560657467“],”ngrams_url“:”https://api.openalex.org/works/W4322747033/ngrams网站“,”“abstract_inverted_index”:{“基于文本”:[0],“可视”:[1,26,50,59,76115],“问题”:[2,10,62100],“回答”:[3],“(TextVQA)”:[4],“是”:[5],“到”:[6,20,42104121140],“答案”:[7],“a”:[8,15,86134],“文本相关”:[9],“by”:[11],“阅读”:[12],“文本”:[13,30,48,80106148],“”在“:[14,31,70127174],”给定“:[16],”图像“:[17],”其中“:[18,95],”需要”:[19],“共同”:[21],“原因”:[22],“超过”:[23],“三个”:[24],“形式\u2014question,”:[25],“对象”:[27],“和”:[28,49,63,75101108125149159177],“场景”:[29,64,79130],“图像。“:[32],“大多数”:[33],“现有”:[34],“作品”:[35],“杠杆”:[36],“图形”:[37],“或”:[38],“复杂”:[39],“注意”:[40],“机制”:[41],“增强”:[43],“the”:[44,61,99111123128146157179],“interaction”:[45],“between”:[46],“objects”。“:[51],”In“:[52],”this“:[53],”paper“:[54],”observating“:[55],”that“:[56163],”compared“:[57],”with“:58137],”objects“,”:[60],“text”:[65],“models”:[66],74],“外观”:[77],“of”:[78],“also”:[82],“有用”:[83],“我们”:[84132],“提议”:[85]两阶段”:[87],“多模态”:[88],“融合”:[89],“基于”:[90],“方法”:[91165],“用于”:[92],“高性能”:[93],“TextVQA”:[94],“第一”:[96],“语义”:[97],“组合”:[98],“OCR”:[102],“令牌”:[103],“理解”:[105],“更好”:[107],“然后”:[109],“集成”:[110],“组合”:[112],“结果”:[113],“进入”:[114],“特征”:[116],“作为”:[117],“附加“:[118],”信息。“:[119],”“此外,”:[120],“缓解”:[122],“冗余”:[124],“噪音”:[126],“识别”:[129],“文本”,:[131],“开发”:[133],“去噪”:[135],“模块”:[136],“对比”:[138],“损失”:[139],“制造”:[141],“我们的”:[142164],“模型”:[143],“焦点”:[144],“关注”:[145156],“相关”:[147],“因此,“:[150],”获取“:[151],”健壮“:[153],”特征。“:[154],“实验”:[155],“ST-VQA”:[160],“数据集”:[161],“显示”:[162],“实现”:[166],“竞争”:[167],“性能”:[168],“无”:[169],“任何”:[170],“大规模”:[171],“预培训”:[172],“使用”:[173],“最近”:[175],“作品”:[176],“表现优异”:[178],“最先进”:[180],“方法”:[181],“之后”:[182],“存在”:[183],“预先训练。“:[184]},”cited_by_api_url“:”https://api.openalex.org/works?filter=cites:W4322747033“,”counts_by_year“:[{”年“:2024,”cited_by_count“:1}],”更新日期“:”2024-06-04T17:27:37.066389“,”创建日期“:“2023-03-03”}