{“id”:“https://openalex.org/W4226278401“,”doi“:”https://doi.org/10.48550/arxiv.2203.02155“,”title“:”训练语言模型遵循人类反馈指令“,”display_name“:”训练语言模型遵循人类反馈指令“,”publication_year“:2022,”publication_date“:”2022-01-01“,”ids“:{”openalex“:”https://openalex.org/W4226278401“,”doi“:”https://doi.org/10.48550/arxiv.2203.02155“},”language“:”en“,”primary_location“:{”is_oa“:true,”landing_page_url“:”https://arxiv.org/abs/2203.02155“,”pdf_url“:空,”源“:{”id“:”https://openalex.org/S4306400194“,”display_name“:”arXiv(康奈尔大学)“,”issn_l“:null,”issn“:null,”is_oa“:true,”is.in_doaj“:false,”is_core“:fase,”host_organization“:”https://openalex.org/I205783295“,”“host_organization_name”:“康奈尔大学”,“host_organization_lineage”:[“https://openalex.org/I205783295“],”host_organization_lineage_names“:[”康奈尔大学“],“type”:“repository”},“license”:“other-oa”,“licence_id”:“https://openalex.org/licenses/other-oa网站“,”version“:”submittedVersion“,”is_accepted“:false,”is_published“:false},”type“:”preprint“,”type_crossref“:“posted-content”,”indexed_in“:[”arxiv“,”datacite“],”open_access“:{”is_oa“:true,”oa_status“:”green“,”oa_url“:”https://arxiv.org/abs/2203.02155“,”any_repository_has_fulltext“:true},”authorships“:[{”author_position“:”第一“,”作者“:{”id“:”https://openalex.org/A5068949174“,”display_name“:”Long Ouyang“,”orcid“:null},”institutions“:[],”countries“:[],”is_corresponding“:false,”raw_author_name“:”Ouyang-Long“,”raw _affiliation_strings“:],”affiliations“:[/]},{”author_position“:”middle“,”author“:{”id“:”https://openalex.org/A5007570707“,”display_name“:”Jeff Wu“,”orcid“:null},”institutions“:[],”countries“:[],”is_corresponding“:false,”raw_author_name“:”Wu,Jeff“,”raw _affiliation_strings“:],”affiliations“:[/]},{”author_position“:”middle“,”author“:{”id“:”https://openalex.org/A5103934038“,”display_name“:”Xu Jiang“,”orcid“:null},”institutions“:[],”countries“:[],”is_corresponding“:false,”raw_author_name“:”Jiang,Xu“,”raw _affiliation_strings“:],”affiliations“:[/]},{”author_position“:”middle“,”author“:{”id“:”https://openalex.org/A5104285560“,”display_name“:”Diogo Almeida“,”orcid“:null},”institutions“:[],”countries“:[],”is_corresponding“:false,”raw_author_name“,”armeida,Diogo“,”raw _affiliation_strings“:],”affiliations“:[/]},{”author_position“:”middle“,”author“:{”id“:”https://openalex.org/A5022076965“,”display_name“:”Carroll L.Wainwright“,”orcid“:null},”institutions“:[],”countries“:[],”is_corresponding“:false,”raw_author_name“:”Wainwrite,Carroll L“,”raw _affiliation_strings“:],”affiliations“:[/]},{”author_position“:”middle“,”author“:{”id“:”https://openalex.org/A5028772381“,”display_name“:”Pamela Mishkin“,”orcid“:null},”institutions“:[],”countries“:[],”is_corresponding“:false,”raw_author_name“:”Mishkins,Pamela“,”raw _affiliation_strings“:],”afliations“:【】},{”author_position“:”middle“,”author“:{”id“:”https://openalex.org/A5100393899“,”display_name“:”Chong Zhang“,”orcid“:”https://orcid.org/0000-0002-2162-4344“},”机构“:[],”国家“:[[],”is_corresponding“:false,”raw_author_name“:”张,冲“,”raw_affiliation_strings“:[],“隶属关系”:[]},{“author_position”:“middle”,“author”:{“id”:“https://openalex.org/A5057289323“,”display_name“:”Sandhini Agarwal“,”orcid“:null},”institutions“:[],”countries“:[],”is_corresponding“:false,”raw_author_name“:”Agarwal-,Sandhiniti“,”raw _affiliation_strings“:],”affiliations“:【】},{”author_position“:”middle“,”author“:{”id“:”https://openalex.org/A5022205442“,”display_name“:”Katarina Slama“,”orcid“:null},”institutions“:[],”countries“:[],”is_corresponding“:false,”raw_author_name“:”Slama,Katarina”,“raw_affiliation_strings”:[]、“afliations”:[]},{“author_position”:“middle”,“author”:{“id”:“”https://openalex.org/A5065002167“,”display_name“:”Alex Ray“,”orcid“:”https://orcid.org/0000-0001-5760-7821“},”机构“:[],”国家“:[[],”is_corresponding“:false,”raw_author_name“:”Ray,Alex“,”raw关联字符串“:[],“附属关系”:[]},{“author_position”:“middle”,“author”:{“id”:“https://openalex.org/A5102168278“,”display_name“:”John Schulman“,”orcid“:null},”institutions“:[],”countries“:[],”is_corresponding“:false,”raw_author_name“:”Schulman,John“,”raw _affiliation_strings“:],”affiliations“:[/]},{”author_position“:”middle“,”author“:{”id“:”https://openalex.org/A5007861777“,”display_name“:”Jacob Hilton“,”orcid“:”https://orcid.org/0000-0003-1931-9516“},”机构“:[],”国家“:[[],”is_corresponding“:false,”raw_author_name“:”希尔顿,雅各布“,”raw_affiliation_strings“:[],“隶属关系”:[]},{“author_position”:“middle”,“author”:{“id”:“https://openalex.org/A5011207305“,”display_name“:”Fraser Kelton“,”orcid“:null},”institutions“:[],”countries“:[],”is_corresponding“:false,”raw_author_name“:”Kelton,Fraser“,”raw _affiliation_strings“:],”affiliations“:【】},{”author_position“:”middle“,”author“:{”id“:”https://openalex.org/A5037192369“,”display_name“:”卢克·E·米勒“,”兽人“:”https://orcid.org/0000-0002-7865-881X“},”机构“:[],”国家“:[],”is_correresponsing“:false,”raw_author_name“:”Miller,Luke“,”raw_affiation_strings“:[],”附属机构“:[]},{”author_position“:”middle“,”author“:{”id“:”https://openalex.org/A5011719782“,”display_name“:”Maddie Simens“,”orcid“:null},”institutions“:[],”countries“:[],”is_corresponding“:false,”raw_author_name“:”Simens,Maddie“,”raw _affiliation_strings“:],”affiliations“:【】},{”author_position“:”middle“,”author“:{”id“:”https://openalex.org/A5030305998“,”display_name“:”Amanda Askell“,”orcid“:null},”institutions“:[],”countries“:[],”is_corresponding“:false,”raw_author_name“:”Askell,Amanda“,”raw _affiliation_strings“:],”affiliations“:[/]},{”author_position“:”middle“,”author“:{”id“:”https://openalex.org/A5010674841“,”display_name“:”Peter Welinder“,”orcid“:null},”institutions“:[],”countries“:[],”is_corresponding“:false,”raw_author_name“:”Welinder,Peter“,”raw _affiliation_strings“:],”affiliations“:[/]},{”author_position“:”middle“,”author“:{”id“:”https://openalex.org/A5043847536“,”display_name“:”Paul Christiano“,”orcid“:null},”institutions“:[],”countries“:[],”is_corresponding“:false,”raw_author_name“:”Christiano,Paul“,”raw _affiliation_strings“:],”affiliations“:[/]},{”author_position“:”middle“,”author“:{”id“:”https://openalex.org/A5090592321“,”display_name“:”Jan Leike“,”orcid“:null},”institutions“:[],”countries“:[],”is_corresponding“:false,”raw_author_name“:”Leike,Jan“,”raw _affiliation_strings“:],”affiliations“:[/]},{”author_position“:”last“,”author“:{”id“:”https://openalex.org/A5038882588“,”display_name“:”Ryan Lowe“,”orcid“:”https://orcid.org/0000-0002-7080-8406“},”机构“:[],”国家“:[[],”is_corresponding“:false,”raw_author_name“:”Lowe,Ryan“,”raw _ afiliation_strings“:[]],”附属机构“:[]}],”机构评估“:[],“countries _ distinct_count”:0,“机构_ distinct _count“:0,”corresponding_author_ids“:[】,”相应的机构_ id“:[〕,”apc_list“:null,”apc _ paid“:null,“fwci”:nullhas_fulltext“:false,”cited_by_count“:2101,”citation_normalized_percentile“:{”value“:0.99994,”is_in_top_1_percent“:true,”is_ in_top_ 10_percents“:true},”citted_by_count_year“:{“min”:99,“max”:100},“biblio”:{“volume”:null,“issue”:null,“first_page”:nul,“last_page”:null},‘is_retracted’:false“primary_topic”:{“id”:“https://openalex.org/T10028“,”display_name“:”自然语言处理“,”score“:0.9944,”subfield“:{”id“:”https://openalex.org/subfields/1702“,”display_name“:”人工智能“},”字段“:{”id“:”https://openalex.org/fields/17“,”display_name“:”Computer Science“},”domain“:{”id“:”https://openalex.org/domains/3“,”display_name“:”Physical Sciences“}},”topics“:[{”id“:”https://openalex.org/T10028“,”display_name“:”自然语言处理“,”score“:0.9944,”subfield“:{”id“:”https://openalex.org/subfields/1702“,”display_name“:”人工智能“},”字段“:{”id“:”https://openalex.org/fields/17“,”display_name“:”Computer Science“},”domain“:{”id“:”https://openalex.org/domains/3“,”display_name“:”物理科学“}},{”id“:”https://openalex.org/T10181“,”display_name“:”统计机器翻译与自然语言处理“,”score“:0.9629,”subfield“:{”id“:”https://openalex.org/subfields/1702“,”display_name“:”人工智能“},”字段“:{”id“:”https://openalex.org/fields/17“,”display_name“:”Computer Science“},”domain“:{”id“:”https://openalex.org/domains/3“,”display_name“:”物理科学“}},{”id“:”https://openalex.org/T12026“,”display_name“:”可解释人工智能“,”score“:0.962,”subfield“:{”id“:”https://openalex.org/subfields/1702“,”display_name“:”人工智能“},”字段“:{”id“:”https://openalex.org/fields/17“,”display_name“:”Computer Science“},”domain“:{”id“:”https://openalex.org/domains/3“,”display_name“:”物理科学“}}],”关键词“:[{”id“:”https://openalex.org/keywords/language-modeling“,”display_name“:”语言建模“,”score“:0.590402},{”id“:”https://openalex.org/keywords/topic-modeling网站“,”display_name“:”主题建模“,”score“:0.554978},{”id“:”https://openalex.org/keywords/syntax-based-translation-models网站“,”display_name“:”基于句法的翻译模型“,”score“:0.520673},{”id“:”https://openalex.org/keywords/part-of-speech-tagging“,”“display_name”“:”部分讲话标记“,”score“:0.519968},{”id“:”https://openalex.org/keywords/responsibility-in-ai“,”display_name“:”AI责任“,”score“:0.517546},{”id“:”https://openalex.org/keywords/training-set“,”display_name“:”训练集“,”score“:0.5044843}],”concepts“:[{”id“:”https://openalex.org/C41008148,“wikidata”:https://www.wikidata.org/wiki/Q21198(网址:https://www.wikidata.org/wiki/Q21198)“,”display_name“:”计算机科学“,”level“:0,”score“:0.8601968},{”id“:”https://openalex.org/C137293760,“wikidata”:https://www.wikidata.org/wiki/Q3621696“,”display_name“:”语言模型“,”level“:2,”score“:0.78678775},{”id“:”https://openalex.org/C177264268,“wikidata”:https://www.wikidata.org/wiki/Q1514741“,”display_name“:”Set(abstract data type)“,”level“:2,”score“:0.77025944},{”id“:”https://openalex.org/C2780586882,“wikidata”:https://www.wikidata.org/wiki/Q7520643“,”display_name“:”Simple(哲学)“,”level“:2,”score“:0.5595702},{”id“:”https://openalex.org/C97541855,“wikidata”:https://www.wikidata.org/wiki/Q830687“,”display_name“:”强化学习“,”level“:2,”score“:0.5496878},{”id“:”https://openalex.org/C154945302,“wikidata”:https://www.wikidata.org/wiki/Q11660“,”display_name“:”人工智能“,”level“:1,”score“:0.5492983},{”id“:”https://openalex.org/C204323151,“wikidata”:https://www.wikidata.org/wiki/Q905424“,”“display_name”“:”范围(航空)“,”级别“:2,”分数“:0.5058255},{”id“:”https://openalex.org/C51632099,“wikidata”:https://www.wikidata.org/wiki/Q3985153“,”“display_name”“:”训练集“,”级别“:2,”分数“:0.5044843},{”id“:”https://openalex.org/C119857082,“wikidata”:https://www.wikidata.org/wiki/Q2539“,”display_name“:”机器学习“,”level“:1,”score“:0.44980398},{”id“:”https://openalex.org/C204321447,“wikidata”:https://www.wikidata.org/wiki/Q30642“,”display_name“:”自然语言处理“,”level“:1,”score“:0.39584786},{”id“:”https://openalex.org/C107457646,“wikidata”:https://www.wikidata.org/wiki/Q207434“,”display_name“:”人\u2013计算机交互“,”level“:1,”score“:0.32342207},{”id“:”https://openalex.org/C199360897,“wikidata”:https://www.wikidata.org/wiki/Q9143“,”display_name“:”编程语言“,”level“:1,”score“:0.097115844},{”id“:”https://openalex.org/C138885662,“wikidata”:https://www.wikidata.org/wiki/Q5891“,”display_name“:”哲学“,”等级“:0,”分数“:0.0},{”id“:”https://openalex.org/C192562407,“wikidata”:https://www.wikidata.org/wiki/Q228736网址“,”display_name“:”材料科学“,”level“:0,”score“:0.0},{”id“:”https://openalex.org/C111472728,“wikidata”:https://www.wikidata.org/wiki/Q9471“,”display_name“:”认识论“,”level“:1,”score“:0.0},{”id“:”https://openalex.org/C159985019,“wikidata”:https://www.wikidata.org/wiki/Q181790“,”display_name“:”Composite material“,”level“:1,”score“:0.0}],”mesh“:[],”locations_count“:2,”locations“:[{”is_oa“:true,”landing_page_url“:”https://arxiv.org/abs/2203.02155“,”pdf_url“:空,”源“:{”id“:”https://openalex.org/S4306400194“,”display_name“:”arXiv(康奈尔大学)“,”issn_l“:null,”issn“:null,”is_oa“:true,”is.in_doaj“:false,”is_core“:fase,”host_organization“:”https://openalex.org/I205783295“,”“host_organization_name”:“康奈尔大学”,“host_organization_lineage”:[“https://openalex.org/I205783295“],”host_organization_lineage_names“:[”康奈尔大学“],“type”:“repository”},“license”:“other-oa”,“licence_id”:“https://openalex.org/licenses/other-oa网站“,”version“:”submittedVersion“,”is_accepted“:false,”is_published“:false},{”is_oa“:false,”landing_page_url“:”https://api.datacite.org/dois/10.48550/arxiv.2203.02155“,”pdf_url“:空,”源“:{”id“:”https://openalex.org/S4393179698“,”display_name“:”DataCite API“,”issn_l“:null,”issn“:null,”is_oa“:true,”is_ in_doaj“:false,”is_core“:false,”host_organization“:”https://openalex.org/I4210145204“,”“host_organization_name”:“DataCite”,“host_organization_lineage”:[“https://openalex.org/I4210145204“],”host_organization_lineage_names“:[”DataCite“],“type”:“metadata”},“license”:null,“licence_id”:null,“version”:null}],“best_oa_location”:{“is_oa”:true,“landing_page_url”:“https://arxiv.org/abs/2203.02155“,”pdf_url“:空,”源“:{”id“:”https://openalex.org/S4306400194“,”display_name“:”arXiv(康奈尔大学)“,”issn_l“:null,”issn“:null,”is_oa“:true,”is.in_doaj“:false,”is_core“:fase,”host_organization“:”https://openalex.org/I205783295“,”“host_organization_name”:“康奈尔大学”,“host_organization_lineage”:[“https://openalex.org/I205783295“],”host_organization_lineage_names“:[”康奈尔大学“],“type”:“repository”},“license”:“other-oa”,“licence_id”:“https://openalex.org/licenses/other-oa网站“,”version“:”submittedVersion“,”is_accepted“:false,”is_published“:false},”sustainable_development_goals“:[{”id“:”https://metadata.un.org/sdg/16“,”display_name“:”和平、正义和强大的机构“,”score“:0.64}],”grants“:[],”datasets“:[],”versions“:[】,”referenced_works_count“:0,”referrenced_works“:],”related_work斯“:[”https://openalex.org/W4380318855","https://openalex.org/W4362501864","https://openalex.org/W4306904969","https://openalex.org/W4287644835","https://openalex.org/W3098003361","https://openalex.org/W3092281475","https://openalex.org/W2382521049","https://openalex.org/W2144385241","https://openalex.org/W2138720691","https://openalex.org/W1585007175“],”abstract_inverted_index“:{”Making“:[0],”language“:[1,18,54206],”models“:[2,19,38,55134165207],”bigger“:[3],”does“:[4],”not“:[5,29,40],”固有“:[6],”make“:[7],“them”:[8],”better“:[9],”at“:[10],”following“:[11],”a“:[12,60,72,86108201],”user's“:[13],”intent.“:[14210],”对于“:[15],”示例,“:[16],”大型“:[17],”可以“:[20],”生成“:[21],”输出“:[22143153],”that“:[23195],”are“:[24,39150],”rulthful“,:[25],”toxic“,“:[26],”or“:[27],”simply“:[28],”help“:[30],”to“:[31,99118152],”the“:[32,81,92132145155],”user。“:[33],”In“:[34,45136],”其他“:[35],”单词,“:[36],”这些“:[37],”对齐“:[41],”与“:[42,56,67,71197208],”他们的“:[43],”用户。“:[44],”this“:[46121],”paper“:[47],”we“:[48,84,97116],”show“:[49166194],”an“:[50],”avenue“:[51],”for“:[52204],”aligning“:[53205],”user“:57],”intent“:[58],”on“:[59139181],”wide“:[61],”range“:[62],”of“:[63,74,88,91110112],”tasks“:[24],”“by”:[65],“fine-tuning”:[66196],“human”:[68128137198209],“feedback”。“:[69129],”开始“:[70],”设置“:[73],”标签写入“:[75],”提示“:[76,78],”和“:[77170],”提交“:[79],”通过“:[80],”OpenAI“:[82],”API“:[83],”收集“:[85107],”数据集“:[87109],”标记“:[89],”演示“:[90],”所需“:[93],”模型“:[94113123149],”行为“:[95],“which”:[96115],“use”:[98117],“fine-tune”:[100120],“GPT-3“:[101],”使用“:[102124],”监督“:[103122],”学习。“:[104],“我们”:[105130],“然后”:[106],“排名”:[111],“输出”:[114],“进一步”:[119],“强化”:[125],“学习”:[126],“来自”:[127144154],“调用”:[131],“结果”:[133],“指导GPT。“:[135],”评估“:[138],”我们的“:[140192],”提示“:[141],”分布“:[142],”1.3B“:[146],”参数“:[147],”指令GPT“:[148164187],”首选“:[151],”175B“:%156],”GPT-3“:[157],”尽管“:[158],”有“:[159177],”100x“:[160],”较少“:[161],”参量。“:[162],”“此外,”:[163],“改进”:[167],“in”:[168172],“真实性”:[169],“减少”:[171],“有毒”:[173],“输出”:[174],“生成”:[175],“while”:[176],“最小”:[178],“性能”:[179],“回归”:[180],“公共”:[182],“NLP”:[183],“数据集。“:[184],”Even“:[185],”thought“:[186],”still“:[188],”makes“:[189],”simple“:[190],”errors“,”:[191],“results”:[193],“feedback”:[199],“is”:[200],“looking”:[202],“direction”:[203]},“cited_by_api_url”:“https://api.openalex.org/works?filter=cites:W4226278401“,”counts_by_year“:[{“年”:2024,”cited_by_count“:625},{“年份”:2023,”ciped_by_cunt“:1270},”{“年度”:2022,“cited_by_count”:61},“年份”:2020,”cited_by_count:1}],”updated_date“:2024-09-14T05:48:10.642155”,”created_dated“日期”:“2022-05-05”}