{“id”:“https://openalex.org/W4388748280“,”doi“:”https://doi.org/10.48550/arxiv.2311.09006“,”title“:”Data Similarity is Not Enough to Explain Language Model Performance“,”display_name“:”数据相似性不足以解释语言模型性能“,”publication_year“:2023,”publitation_date“:”2023-01-01“,”ids“:{”openalex“:”https://openalex.org/W4388748280“,”doi“:”https://doi.org/10.48550/arxiv.2311.09006“},”language“:”en“,”primary_location“:{”is_oa“:true,”landing_page_url“:”https://arxiv.org/abs/2311.09006“,”pdf_url“:空,”源“:{”id“:”https://openalex.org/S4306400194“,”display_name“:”arXiv(康奈尔大学)“,”issn_l“:null,”issn“:null,”is_oa“:true,”is_ in_doaj“:false,”host_organization“:”https://openalex.org/I205783295“,”host_organization_name“:”康奈尔大学“,”host_organization_lineage“:[”https://openalex.org/I205783295“],”host_organization_lineage_names“:[”康奈尔大学“],“type”:“repository”},“license”:“other-oa”,“licence_id”:“https://openalex.org/licenses/other-oa“,”version“:null,”is_accepted“:false,”is_published“:false},”type“:预打印”,”type_crossref“:”journal-article“,”indexed_in“:[”arxiv“,”datacite“],”open_access“:{”is_oa“:true,”oa_status“:”green“,”oa_url“:”https://arxiv.org/abs/2311.09006“,”any_repository_has_fulltext“:true},”authorships“:[{”author_position“:”第一“,”作者“:{”id“:”https://openalex.org/A5033032726“,”display_name“:”Gregory Yauney“,”orcid“:null},”institutions“:[],”countries“:[],”is_corresponding“:false,”raw_author_name“:”Yauney,Gregory“,”raw _affiliation_strings“:]},{”author_position“:”middle“,”author“:{”id“:”https://openalex.org/A5019880413“,”display_name“:”Emily Reif“,”orcid“:”https://orcid.org/0000-0003-3572-6234“},”机构“:[],”国家“:[[],”is_corresponding“:false,”raw_author_name“:”Reif,Emily“,”raw关联字符串“:[]},{“author_position”:“last”,“author”:{“id”:“”https://openalex.org/A5086934220“,”display_name“:”David Mimno“,”orcid“:”https://orcid.org/0000-0001-7510-9404“},”机构“:[],”国家“:[[],”is_corresponding“:false,”raw_author_name“:”Mimno,David“,”raw关联字符串“:[]}],”countries_distict_count“:0,”机构_distiction_count“:0,,”corresponding_author_ids“:[],0,“cited_by_percentile_year“:{“min”:0,“max”:78},“biblio”:{卷:null,“问题”:nullhttps://openalex.org/T10028“,”display_name“:”自然语言处理“,”score“:0.9992,”subfield“:{”id“:”https://openalex.org/subfields/1702“,”display_name“:”人工智能“},”字段“:{”id“:”https://openalex.org/fields/17“,”display_name“:”Computer Science“},”domain“:{”id“:”https://openalex.org/domains/3“,”display_name“:”物理科学“}},”主题“:[{”id“:”https://openalex.org/T10028“,”display_name“:”自然语言处理“,”score“:0.9992,”subfield“:{”id“:”https://openalex.org/subfields/1702“,”display_name“:”人工智能“},”字段“:{”id“:”https://openalex.org/fields/17“,”display_name“:”Computer Science“},”domain“:{”id“:”https://openalex.org/domains/3“,”display_name“:”物理科学“}},{”id“:”https://openalex.org/T10181“,”display_name“:”统计机器翻译和自然语言处理“,”score“:0.9979,”subfield“:{”id“:”https://openalex.org/subfields/1702“,”display_name“:”人工智能“},”字段“:{”id“:”https://openalex.org/fields/17“,”display_name“:”Computer Science“},”domain“:{”id“:”https://openalex.org/domains/3“,”display_name“:”物理科学“}},{”id“:”https://openalex.org/T13629“,”display_name“:”自动文本简化和可读性评估“,”score“:0.9161,”subfield“:{”id“:”https://openalex.org/subfields/1702“,”display_name“:”人工智能“},”字段“:{”id“:”https://openalex.org/fields/17“,”display_name“:”Computer Science“},”domain“:{”id“:”https://openalex.org/domains/3“,”display_name“:”物理科学“}}],”关键词“:[{”id“:”https://openalex.org/keywords/language-modeling“,”display_name“:”语言建模“,”score“:0.562082},{”id“:”https://openalex.org/keywords/semantic-similarity网站“,”display_name“:”语义相似度“,”score“:0.552512},{”id“:”https://openalex.org/keywords/topic建模“,”display_name“:”Topic Modeling“,”score“:0.540857},{”id“:”https://openalex.org/keywords/统计语言模型“,”display_name“:”统计语言模型“,”score“:0.529322},{”id“:”https://openalex.org/keywords/dependency-parsing“,”display_name“:”依赖关系分析“,”score“:0.505343}],”concepts“:[{”id“:”https://openalex.org/C103278499,“wikidata”:https://www.wikidata.org/wiki/Q254465“,”display_name“:”相似性(几何)“,”level“:3,”score“:0.7855175},{”id“:”https://openalex.org/C41008148,“wikidata”:https://www.wikidata.org/wiki/Q21198“,”display_name“:”计算机科学“,”level“:0,”score“:0.7029978},{”id“:”https://openalex.org/C2780451532,“wikidata”:https://www.wikidata.org/wiki/Q759676“,”display_name“:”Task(project management)“,”level“:2,”score“:0.6352569},{”id“:”https://openalex.org/C1960 83921,“wikidata”:https://www.wikidata.org/wiki/Q7915758“,”display_name“:”差异(会计)“,”级别“:2,”分数“:0.62072104},{”id“:”https://openalex.org/C2776207758,“wikidata”:https://www.wikidata.org/wiki/Q5303302“,”display_name“:”下游(制造)“,”level“:2,”score“:0.5611152},{”id“:”https://openalex.org/C41608201,“wikidata”:https://www.wikidata.org/wiki/Q980509“,”display_name“:”Embedding“,”level“:2,”score“:0.50923383},{”id“:”https://openalex.org/C137293760,“wikidata”:https://www.wikidata.org/wiki/Q3621696“,”display_name“:”语言模型“,”level“:2,”score“:0.5007255},{”id“:”https://openalex.org/C154945302,“wikidata”:https://www.wikidata.org/wiki/Q11660“,”display_name“:”人工智能“,”level“:1,”score“:0.46832755},{”id“:”https://openalex.org/C204321447,“wikidata”:https://www.wikidata.org/wiki/Q30642“,”display_name“:”自然语言处理“,”level“:1,”score“:0.4584645},{”id“:”https://openalex.org/C2778755073,“wikidata”:https://www.wikidata.org/wiki/Q10858537“,”display_name“:”Scale(ratio)“,”level“:2,”score“:0.45297733},{”id“:”https://openalex.org/C124101348,“wikidata”:https://www.wikidata.org/wiki/Q172491“,”display_name“:”数据挖掘“,”level“:1,”score“:0.33046},{”id“:”https://openalex.org/C21547014,“wikidata”:https://www.wikidata.org/wiki/Q1423657“,”display_name“:”操作管理“,”level“:1,”score“:0.0},{”id“:”https://openalex.org/C144133560,“wikidata”:https://www.wikidata.org/wiki/Q4830453“,”display_name“:”Business“,”level“:0,”score“:0.0},{”id“:”https://openalex.org/C121332964,“wikidata”:https://www.wikidata.org/wiki/Q413“,”display_name“:”物理“,”级别“:0,”分数“:0.0},{”id“:”https://openalex.org/C187736073,“wikidata”:https://www.wikidata.org/wiki/Q2920921“,”display_name“:”管理“,”级别“:1,”分数“:0.0},{”id“:”https://openalex.org/C121955636,“wikidata”:https://www.wikidata.org/wiki/Q4116214“,”display_name“:”Accounting“,”level“:1,”score“:0.0},{”id“:”https://openalex.org/C62520636,“wikidata”:https://www.wikidata.org/wiki/Q944“,”display_name“:”量子力学“,”level“:1,”score“:0.0},{”id“:”https://openalex.org/C162324750,“wikidata”:https://www.wikidata.org/wiki/Q8134“,”display_name“:”经济学“,”level“:0,”score“:0.0},{”id“:”https://openalex.org/C115961682,“wikidata”:https://www.wikidata.org/wiki/Q860623“,”display_name“:”Image(mathematics)“,”level“:2,”score“:0.0}],”mesh“:[],”locations_count“:2中,”location“:[{”is_oa“:true,”landing_page_url“:”https://arxiv.org/abs/2311.09006“,”pdf_url“:空,”源“:{”id“:”https://openalex.org/S4306400194“,”display_name“:”arXiv(康奈尔大学)“,”issn_l“:null,”issn“:null,”is_oa“:true,”is_ in_doaj“:false,”host_organization“:”https://openalex.org/I205783295“,”host_organization_name“:”康奈尔大学“,”host_organization_lineage“:[”https://openalex.org/I205783295“],”host_organization_lineage_names“:[”康奈尔大学“],“type”:“repository”},“license”:“other-oa”,“licence_id”:“https://openalex.org/licenses/other-oa网站“,”版本“:null,”is_accepted“:false,”is_published“:false},{”is_oa“:false,”landing_page_url“:”https://api.datacite.org/dois/10.48550/arxiv.2311.09006“,”pdf_url“:空,”源“:{”id“:”https://openalex.org/S4393179698“,”display_name“:”DataCite API“,”issn_l“:null,”issn“:null,”is_oa“:true,”is_ in_doaj“:false,”host_organization“:”https://openalex.org/I4210145204“,”“host_organization_name”:“DataCite”,“host_organization_lineage”:[“https://openalex.org/I4210145204“],”host_organization_lineage_names“:[”DataCite“],“type”:“metadata”},“license”:null,“licence_id”:null,“version”:null}],“best_oa_location”:{“is_oa”:true,“landing_page_url”:“https://arxiv.org/abs/2311.09006“,”pdf_url“:空,”源“:{”id“:”https://openalex.org/S4306400194“,”display_name“:”arXiv(康奈尔大学)“,”issn_l“:null,”issn“:null,”is_oa“:true,”is_ in_doaj“:false,”host_organization“:”https://openalex.org/I205783295“,”host_organization_name“:”康奈尔大学“,”host_organization_lineage“:[”https://openalex.org/I205783295“],”host_organization_lineage_names“:[”康奈尔大学“],“type”:“repository”},“license”:“other-oa”,“licence_id”:“https://openalex.org/licenses/other-oa“,”version“:null,”is_accepted“:false,”is_published“:false},”sustainable_development_goals“:[{”display_name“:”素质教育“,”score“:0.56,”id“:”https://metadata.un.org/sdg/4“}],”grants“:[],”datasets“:[],”versions“:[】,”referenced_works_count“:0,”referrenced_works“:],”related_work斯“:[”https://openalex.org/W4249524554","https://openalex.org/W4214653257","https://openalex.org/W35583307","https://openalex.org/W3040203686","https://openalex.org/W2521424917","https://openalex.org/W2436192316","https://openalex.org/W2349021146","https://openalex.org/W2081900870","https://openalex.org/W2055438207","https://openalex.org/W1583765404“],”ngrams_url“:”https://api.openalex.org/works/W4388748280/ngrams网站“,”“abstract_inverted_index”:{“大”:[0],“语言”:[1,63],“模型”:[2],“实现”:[3],“高”:[4],“性能”:[5,65,83],“on”:[6],“多”:[7],“但是”:[8,87],“不是”:[9,98],“全部”:[10],“下游”:[11,78115],“任务。”:[12],“The”:[13],“interaction”:[14],11],“预训练”:[16,39,75112],“数据”:[17,20,31,40113],“和”:[18,53,59,73114],“任务“:[19,29],”是“:[21,33,41117],”常用“:[22],”假定“:[23,42],”to“:[24,36,43],”确定“:[25],”此“:[26],”方差:“:[27],”a“:[28,37,67],”with“:[30,62,77,82100],”that“:[32,47,94108],”more“:[34118],”相似“:[35],”模型“:[38],”be“:[44],“更容易”:[45],“用于”:[46,84],“模型。“:[48],“我们”:[49],“测试”:[50],“是否”:[51],“分布”:[52],“特定示例”:[54],“相似性”:[55,95],“度量值”:[56],“(嵌入-,”:[57],“标记-”:[58],“基于模型)”:[60],“相关”:[61],“模型”:[64],“通过”:[66],“大规模”:[68],“比较”:[69],“的”:[70],“:[71109],“桩”:[72],“C4”:[74],“数据集”:[76],“基准。“:[79],“相似性”:[80],“相关性”:[81],“多语言”:[85],“数据集”:[86],“in”:[88],“other”:[89],“基准,”:[90],“we”:【91】,“令人惊讶”:[92],“find”:【93】,“metrics”:【96】,“are”:〔97〕,“correlated”:[99],“准确性”:【101】,“or”:【102】,“even”:【103】,“each”:【104】,“其他。“:[105],”This“:[106],”suggestes“:[107],”relationship“:[110],”tasks“:[116],”complex“:[119],”than“:[120],”commery“:[121],”assument。“:[122]},”cited_by_api_url“:”https://api.openalex.org/works?filter=cites:W4388748280“,”counts_by_year“:[],”updated_date“:”2024-05-25T23:46:08.751516“,”created_date“:”2023-11-17“}