{“id”:“https://openalex.org/W4387596586“,”doi“:”https://doi.org/10.48550/arxiv.2310.07641“,”title“:”在评估指令遵循时评估大型语言模型“,”display_name“:”评估指令遵循中评估大型语言模式“,”publication_year“:2023,”publiation_date“:”2023-01-01“,”ids“:{”openalex“:”https://openalex.org/W4387596586“,”doi“:”https://doi.org/10.48550/arxiv.2310.07641“},”language“:”en“,”primary_location“:{”is_oa“:true,”landing_page_url“:”https://arxiv.org/abs/2310.07641“,”pdf_url“:空,”源“:{”id“:”https://openalex.org/S4306400194“,”display_name“:”arXiv(康奈尔大学)“,”issn_l“:null,”issn“:null,”is_oa“:true,”is_ in_doaj“:false,”host_organization“:”https://openalex.org/I205783295“,”“host_organization_name”:“康奈尔大学”,“host_organization_lineage”:[“https://openalex.org/I205783295“],”host_organization_lineage_names“:[”康奈尔大学“],“type”:“repository”},“license”:“cc-by”,“licence_id”:“https://openalex.org/licenses/cc-by“,”version“:”submittedVersion“,”is_accepted“:false,”is_published“:false},”type“:”preprint“,”type_crossref“:“posted-content”,”indexed_in“:[”arxiv“,”datacite“],”open_access“:{”is_oa“:true,”oa_status“:”green“,”oa_url“:”https://arxiv.org/abs/2310.07641“,”any_repository_has_fulltext“:true},”authorships“:[{”author_position“:”first“,”author“:{”id“:”https://openalex.org/A5075168326“,”display_name“:”曾志远“,”orcid“:”https://orcid.org/0000-0002-3979-3322“},”机构“:[],”国家“:[[],”is_corresponding“:false,”raw_author_name“:”曾致远“,”raw关联字符串“:[]},{“author_position”:“middle”,“author”:{“id”:“https://openalex.org/A5083571765“,”display_name“:”Jian Yu“,”orcid“:”https://orcid.org/0000-0001-8711-6679“},”机构“:[],”国家“:[[],”is_corresponding“:false,”raw_author_name“:”Yu,Jiatong“,”raw关联字符串“:[]},{“author_position”:“middle”,“author”:{“id”:“https://openalex.org/A5035139834“,”display_name“:”天宇高“,”兽人“:”https://orcid.org/0009-0001-5988-7967“},”机构“:[],”国家“:[],”is_correresponsing“:false,”raw_author_name“:”Gao,Tianyu“,”raw_affiation_strings“:[]},{”author_position“:”middle“,”author“:{”id“:”https://openalex.org/A5014499721“,”display_name“:”虞宋梦“,”兽人“:”https://orcid.org/0009-0007-4934-9392“},”机构“:[],”国家“:[],”is_corresponding“:false,”raw_author_name“:”Meng,Yu“,”raw_affiation_strings“:[]},{”author_position“:”middle“,”author“:{”id“:”https://openalex.org/A502127239“,”display_name“:”Tanya Goyal“,”orcid“:null},”institutions“:[],”countries“:[],”is_corresponding“:false,”raw_author_name“:”Goyal,Tanya.“,”raw _affiliation_strings“:]},{”author_position“:”last“,”author“:{”id“:”https://openalex.org/A5051064208“,”display_name“:”陈丹琪“,”orcid“:”https://orcid.org/0000-0002-6226-6838“},”机构“:[],”国家“:[',”is_corresponding“:false,”raw_author_name“:”陈丹奇“,”raw关联字符串“:[]}],”countries_distict_count“:0,”机构区分计数“:0,“cited_by_percentile_year“:{“min”:87,“max”:92},“biblio”:{”volume“:null,”issue“:null,”first_page“:null},”is_retracted“:false,”is_paratext“:false,”primary_topic“:”{“id”:“https://openalex.org/T13629“,”display_name“:”自动文本简化和可读性评估“,”score“:0.9639,”subfield“:{”id“:”https://openalex.org/subfields/1702“,”display_name“:”人工智能“},”字段“:{”id“:”https://openalex.org/fields/17“,”display_name“:”Computer Science“},”domain“:{”id“:”https://openalex.org/domains/3“,”display_name“:”物理科学“}},”主题“:[{”id“:”https://openalex.org/T13629“,”display_name“:”自动文本简化和可读性评估“,”score“:0.9639,”subfield“:{”id“:”https://openalex.org/subfields/1702“,”display_name“:”人工智能“},”字段“:{”id“:”https://openalex.org/fields/17“,”display_name“:”Computer Science“},”domain“:{”id“:”https://openalex.org/domains/3“,”display_name“:”物理科学“}},{”id“:”https://openalex.org/T10181“,”display_name“:”统计机器翻译和自然语言处理“,”score“:0.9639,”subfield“:{”id“:”https://openalex.org/subfields/1702“,”display_name“:”人工智能“},”字段“:{”id“:”https://openalex.org/fields/17“,”display_name“:”Computer Science“},”domain“:{”id“:”https://openalex.org/domains/3“,”display_name“:”物理科学“}}],”关键词“:[{”id“:”https://openalex.org/keywords/language-modeling“,”display_name“:”语言建模“,”score“:0.745299},{”id“:”https://openalex.org/keywords/statistical-language-models(https://openalex.org/keywords/statistical-language-models)“,”display_name“:”统计语言模型“,”score“:0.689764},{”id“:”https://openalex.org/keywords/natural-language-processing“,”display_name“:”自然语言处理“,”score“:0.664296},{”id“:”https://openalex.org/keywords/syntax-based-translation-models网站“,”display_name“:”基于句法的翻译模型“,”score“:0.651887},{”id“:”https://openalex.org/keywords/complex-word-identification网站“,”display_name“:”复杂词识别“,”score“:0.628976}],”concepts“:[{”id“:”https://openalex.org/C41008148,“wikidata”:https://www.wikidata.org/wiki/Q21198“,”display_name“:”计算机科学“,”level“:0,”score“:0.5388266},{”id“:”https://openalex.org/C204321447,“wikidata”:https://www.wikidata.org/wiki/Q30642“,”display_name“:”自然语言处理“,”level“:1,”score“:0.38382918}],”mesh“:[],”locations_count“:2,”location“:[{”is_oa“:true,”landing_page_url“:”https://arxiv.org/abs/2310.07641“,”pdf_url“:空,”源“:{”id“:”https://openalex.org/S4306400194“,”display_name“:”arXiv(康奈尔大学)“,”issn_l“:null,”issn“:null,”is_oa“:true,”is_ in_doaj“:false,”host_organization“:”https://openalex.org/I205783295“,”“host_organization_name”:“康奈尔大学”,“host_organization_lineage”:[“https://openalex.org/I205783295“],”host_organization_lineage_names“:[”康奈尔大学“],“type”:“repository”},“license”:“cc-by”,“licence_id”:“https://openalex.org/licenses/cc-by“,”version“:”submittedVersion“,”is_accepted“:false,”is_published“:false},{”is_oa“:false,”landing_page_url“:”https://api.datacite.org/dois/10.48550/arxiv.2310.07641“,”pdf_url“:空,”源“:{”id“:”https://openalex.org/S4393179698“,”display_name“:”DataCite API“,”issn_l“:null,”issn“:null,”is_oa“:true,”is_ in_doaj“:false,”host_organization“:”https://openalex.org/I4210145204“,”“host_organization_name”:“DataCite”,“host_organization_lineage”:[“https://openalex.org/I4210145204“],”host_organization_lineage_names“:[”DataCite“],“type”:“metadata”},“license”:null,“licence_id”:null,“version”:null}],“best_oa_location”:{“is_oa”:true,“landing_page_url”:“https://arxiv.org/abs/2310.07641“,”pdf_url“:空,”源“:{”id“:”https://openalex.org/S4306400194“,”display_name“:”arXiv(康奈尔大学)“,”issn_l“:null,”issn“:null,”is_oa“:true,”is_ in_doaj“:false,”host_organization“:”https://openalex.org/I205783295“,”“host_organization_name”:“康奈尔大学”,“host_organization_lineage”:[“https://openalex.org/I205783295“],”host_organization_lineage_names“:[”康奈尔大学“],“type”:“repository”},“license”:“cc-by”,“licence_id”:“https://openalex.org/licenses/cc-by“,”version“:”submittedVersion“,”is_accepted“:false,”is_published“:false},”sustainable_development_goals“:[{”display_name“:”素质教育“,”score“:0.56,”id“:”https://metadata.un.org/sdg/4“}],”grants“:[],”datasets“:[],”versions“:[】,”referenced_works_count“:0,”referrenced_works“:],”related_work斯“:[”https://openalex.org/W4391913857","https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W2530322880","https://openalex.org/W2390279801","https://openalex.org/W2382290278","https://openalex.org/W2376932109","https://openalex.org/W2358668433","https://openalex.org/W2350741829","https://openalex.org/W2001405890“],”ngrams_url“:”https://api.openalex.org/works/W4387596586/ngrams网站“,”“abstract_inverted_index”:{“As”:[0],“research”:[1175],“in”:[2,41,77176],“large”:[3],“language”:[4],“models”:[5],“(LLM)”:[6],“continues”:[7],“to”:[8,20,44,57,69,91113165],“accelerate”:[9],“基于LLM”:[10],“evaluation”:[11],“has”:[12],“emerged”:[13],“As”:[14],“a”:[15,48,63108145],“可扩展”:[16],“和”:[17125132158172],“成本效益”:[18],“替代”:[19],“人类”:[21159],,“评估”:[22],“for”:[23140],“comparing”:[24],“the”:[25,34,58,71,94134154],“ever”:[26],“increased”:[27],“list”:[2],“of”:[29,36,73,87123148],“models。“:[30180],”This“:[31],”paper“:[32],”investments“:[33],”effective“:[35],”these“:[37],”`LLM“:[38],”evaluators'“,”:[39],“special”:[40],“using”:[42],“theme”:[43],“assessment”:[45],“instruction”:[46],“following”,“metric”:[P9],“that”:[50102118151],“gauges”:[51],“how”:[52],“紧密”:[53],“生成”:[54],“文本”:[55],“坚持”:[56],“给定”:[59],“指令”:[60],“我们”:[61142],“介绍”:[62],“挑战”:[64],“元评估”:[65],“基准”:[66],“LLMBar”:[67162],“设计”:[68],“测试”:[70],“能力”:[72],“an”:[74104],“法学硕士”:[75105157170],“评估者”:[76],“辨别”:[78],“指令允许”:[79179],“输出”:[80],“The”:[81],“authors”:[82],“manually”:[83],“cured”:[84],“419”:[85],“对”:[86],“输出”,:[88],“一”:[89],“坚持”:[90],“指令”:[92],“while”:[93],“其他”:[95],“发散”:[96],“yet”:[97],“may”:[98],“拥有”:[99],“欺骗性”:[100],“质量”:[101],“误导”:[103],“评估者”:[106],“例如”:[107],“更多”:[109167],“迷人”:[110],“音调”:[111],“相反”:[112],“现有”:[114],“元评估”:[115],“我们”:[1116163],“发现”:[117],“不同”:[119],“评估者”:[120171],“(即,”:[121],“组合”:[122],“LLM”:[124],“提示”):[126],“展示”:[127],“独特”:[128],“性能”:[129],“on”:[130],“LLMBar”:[131],“偶数”:[133],“最高分位数”:[135],“个数”:[136],“有”:[137],“实质”:[138],“房间”:[139],“改进。”:[141],“也”:[143],“现在”:[144],“小说”:[146],“套房”:[147],“提示”:[149],“策略”:[150],“进一步”:[152],“接近”:[153],“差距”:[155],“介于”:[156],“评估者”。未来”:[174],“发展中”:[177],“更好”:[178]},“cited_by_api_url”:“https://api.openalex.org/works?filter=cites:W4387596586“,”counts_by_year“:[{”年“:2024,”引用_by_count“:1}],”更新日期“:”2024-05-28T08:49:49.982060“,”创建日期“:“2023-10-13”}