{“id”:“https://openalex.org/W4390380480“,”doi“:”https://doi.org/10.48550/arxiv.2312.15997“,”title“:“通过表示工程将大型语言模型与人类偏好对齐”,”display_name“:”通过表示工程使大型语言模型和人类偏好对齐“,”publication_year“:2023,”publiation_date“:”2023-01-01“,”ids“:{”openalex“:”https://openalex.org/W4390380480“,”doi“:”https://doi.org/10.48550/arxiv.2312.15997“},”language“:”en“,”primary_location“:{”is_oa“:true,”landing_page_url“:”https://arxiv.org/abs/2312.15997“,”pdf_url“:空,”源“:{”id“:”https://openalex.org/S4306400194“,”display_name“:”arXiv(康奈尔大学)“,”issn_l“:null,”issn“:null,”is_oa“:true,”is_ in_doaj“:false,”host_organization“:”https://openalex.org/I205783295“,”“host_organization_name”:“康奈尔大学”,“host_organization_lineage”:[“https://openalex.org/I205783295“],”host_organization_lineage_names“:[”康奈尔大学“],“type”:“repository”},“license”:“cc-by-sa”,“licence_id”:“https://openalex.org/licenses/cc-bysa“,”version“:”submittedVersion“,”is_accepted“:false,”is_published“:false},”type“:”preprint“,”type_crossref“:“posted-content”,”indexed_in“:[”arxiv“,”datacite“],”open_access“:{”is_oa“:true,”oa_status“:”green“,”oa_url“:”https://arxiv.org/abs/2312.15997“,”any_repository_has_fulltext“:true},”authorships“:[{”author_position“:”first“,”author“:{”id“:”https://openalex.org/A5069021125“,”display_name“:”刘文浩“,”orcid“:”https://orcid.org/0000-0001-9757-1077“},”机构“:[],”国家“:[],”is_correresponsing“:false,”raw_author_name“:”Liu,Wenhao“,”raw_affiation_strings“:[]},{”author_position“:”middle“,”author“:{”id“:”https://openalex.org/A5074900624“,”display_name“:”王晓华“,”兽人“:”https://orcid.org/0000-0002-6551-151X“},”机构“:[],”国家“:[[],”is_corresponding“:false,”raw_author_name“:”王,小华“,”raw关联字符串“:[]},{“author_position”:“middle”,“author”:{“id”:“https://openalex.org/A5084819949“,”display_name“:”Min Wu“,”orcid“:”https://orcid.org/0000-0003-4485-975X“},”机构“:[],”国家“:[[],”is_corresponding“:false,”raw_author_name“:”Wu,Muling“,”raw关联字符串“:[]},{“author_position”:“middle”,“author”:{“id”:“https://openalex.org/A5020841033“,”display_name“:”天龙里“,”兽人“:”https://orcid.org/0000-0002-9483-457X“},”机构“:[],”国家“:[[],”is_corresponding“:false,”raw_author_name“:”李,天龙“,”raw关联字符串“:[]},{“author_position”:“middle”,“author”:{“id”:“https://openalex.org/A5063414270“,”display_name“:”翠翠绿“,”兽人“:”https://orcid.org/0000-0002-1913-8742“},”机构“:[],”国家“:[[],”is_corresponding“:false,”raw_author_name“:”吕昌泽“,”raw关联字符串“:[]},{“author_position”:“middle”,“author”:{“id”:“https://openalex.org/A5048752310“,”display_name“:”丽娟钟“,”兽人“:”https://orcid.org/0000-0001-6604-268“},”机构“:[],”国家“:[[],”is_corresponding“:false,”raw_author_name“:”凌子轩“,”raw关联字符串“:[]},{“author_position”:“middle”,“author”:{“id”:“https://openalex.org/A5044832379“,”display_name“:”Jiajun Zhu“,”orcid“:”https://orcid.org/0009-0006-7136-2844“},”机构“:[],”国家“:[[],”is_corresponding“:false,”raw_author_name“:”朱建豪“,”raw关联字符串“:[]},{“author_position”:“middle”,“author”:{“id”:“https://openalex.org/A5076223148“,”display_name“:”岑元Zhang“,”orcid“:null},”institutions“:[],”countries“:[],”is_corresponding“:false,”raw_author_name“:”Zhang,岑元“,”raw _affiliation_string“:[]},{”author_position“:”middle“,”author“:{”id“:”https://openalex.org/A5017835517“,”display_name“:”郑晓青“,”orcid“:”https://orcid.org/0000-0003-4430-5036“},”机构“:[],”国家“:[[],”is_corresponding“:false,”raw_author_name“:郑晓庆”,“raw_affiliation_strings”:[]},{“author_position”:“last”,“author”:{“id”:“https://openalex.org/A508834359“,”display_name“:”轩辕黄“,”兽人“:”https://orcid.org/0000-0001-9197-9426“},”机构“:[],”国家“:[[],”is_corresponding“:false,”raw_author_name“:”黄,宣景“,”raw关联字符串“:[]}],”countries_distict_count“:0,”机构区分计数“:0”,“对应的作者ID”:[]、“对应的机构ID”:[],“apc_list”:null,“apc支付”:null,“has_fulltext”:false,“cited_by_count”:0,“cited_by_percentile_year“:{“min”:0,“max”:78},“biblio”:{卷:null,“问题”:nullhttps://openalex.org/T10028“,”display_name“:”自然语言处理“,”score“:0.9927,”subfield“:{”id“:”https://openalex.org/subfields/1702“,”display_name“:”人工智能“},”字段“:{”id“:”https://openalex.org/fields/17“,”display_name“:”Computer Science“},”domain“:{”id“:”https://openalex.org/domains/3“,”display_name“:”物理科学“}},”主题“:[{”id“:”https://openalex.org/T10028“,”display_name“:”自然语言处理“,”score“:0.9927,”subfield“:{”id“:”https://openalex.org/subfields/1702“,”display_name“:”人工智能“},”字段“:{”id“:”https://openalex.org/fields/17“,”display_name“:”Computer Science“},”domain“:{”id“:”https://openalex.org/domains/3“,”display_name“:”物理科学“}},{”id“:”https://openalex.org/T12031“,”“display_name”:“口语系统的对话行为建模”,“score”:0.9535,“subfield”:{“id”:“https://openalex.org/subfields/1702“,”display_name“:”人工智能“},”字段“:{”id“:”https://openalex.org/fields/17“,”display_name“:”Computer Science“},”domain“:{”id“:”https://openalex.org/domains/3“,”display_name“:”物理科学“}}],”关键词“:[{”id“:”https://openalex.org/keywords/reinforcement-learning网站“,”display_name“:”强化学习“,”score“:0.5739},{”id“:”https://openalex.org/keywords/topic-modeling网站“,”display_name“:”主题建模“,”score“:0.533579},{”id“:”https://openalex.org/keywords/word-representation网站“,”display_name“:”单词表示“,”score“:0.52844},{”id“:”https://openalex.org/keywords/user-simulation网站“,”display_name“:”用户模拟“,”score“:0.514764},{”id“:”https://openalex.org/keywords/machine翻译“,”display_name“:”机器翻译“,”score“:0.505038}],”概念“:[{”id“:”https://openalex.org/C2781265381,“wikidata”:https://www.wikidata.org/wiki/Q5710255“,”display_name“:”帮助“,”级别“:2,”分数“:0.8388128},{”id“:”https://openalex.org/C2776359362,“wikidata”:https://www.wikidata.org/wiki/Q2145286“,”display_name“:”Representation(politics)“,”level“:3,”score“:0.7504329},{”id“:”https://openalex.org/C41008148,“wikidata”:https://www.wikidata.org/wiki/Q21198“,”display_name“:”计算机科学“,”level“:0,”score“:0.61449635},{”id“:”https://openalex.org/C14036430网址,“wikidata”:https://www.wikidata.org/wiki/Q3736076网址“,”display_name“:”功能(生物学)“,”级别“:2,”分数“:0.57196784},{”id“:”https://openalex.org/C2779530757,“wikidata”:https://www.wikidata.org/wiki/Q1207505“,”display_name“:”质量(理念)“,”级别“:2,”分数“:0.5445306},{”id“:”https://openalex.org/C2777293324,“wikidata”:https://www.wikidata.org/wiki/Q337349“,”display_name“:”诚实“,”等级“:2,”分数“:0.5346334},{”id“:”https://openalex.org/C9652623,“wikidata”:https://www.wikidata.org/wiki/Q190109“,”display_name“:”Field(mathematics)“,”level“:2,”score“:0.49605402},{”id“:”https://openalex.org/C154945302,“wikidata”:https://www.wikidata.org/wiki/Q11660“,”display_name“:”人工智能“,”level“:1,”score“:0.39079845},{”id“:”https://openalex.org/C107457646,“wikidata”:https://www.wikidata.org/wiki/Q207434“,”display_name“:”人机交互“,”level“:1,”score“:0.34804073},{”id“:”https://openalex.org/C15744967,“wikidata”:https://www.wikidata.org/wiki/Q9418(网址:https://www.wikidata.org/wiki/Q9418)“,”display_name“:”心理学“,”等级“:0,”分数“:0.18853778},{”id“:”https://openalex.org/C77805123,“wikidata”:https://www.wikidata.org/wiki/Q161272“,”display_name“:”社会心理学“,”level“:1,”score“:0.14443213},{”id“:”https://openalex.org/C33923547,“wikidata”:https://www.wikidata.org/wiki/Q395“,”display_name“:”数学“,”等级“:0,”分数“:0.11150819},{”id“:”https://openalex.org/C138885662,“wikidata”:https://www.wikidata.org/wiki/Q5891“,”display_name“:”哲学“,”等级“:0,”分数“:0.0},{”id“:”https://openalex.org/C111472728,“wikidata”:https://www.wikidata.org/wiki/Q9471“,”display_name“:”认识论“,”level“:1,”score“:0.0},{”id“:”https://openalex.org/C78458016,“wikidata”:https://www.wikidata.org/wiki/Q840400“,”display_name“:”进化生物学“,”level“:1,”score“:0.0},{”id“:”https://openalex.org/C94625758,“wikidata”:https://www.wikidata.org/wiki/Q7163“,”display_name“:”政治“,”级别“:2,”分数“:0.0},{”id“:”https://openalex.org/C17744445,“wikidata”:https://www.wikidata.org/wiki/Q36442“,”display_name“:”政治学“,”level“:0,”score“:0.0},{”id“:”https://openalex.org/C202444582,“wikidata”:https://www.wikidata.org/wiki/Q837863“,”display_name“:”纯数学“,”level“:1,”score“:0.0},{”id“:”https://openalex.org/C199539241,“wikidata”:https://www.wikidata.org/wiki/Q7748“,”display_name“:”Law“,”level“:1,”score“:0.0},{”id“:”https://openalex.org/C86803240,“wikidata”:https://www.wikidata.org/wiki/Q420“,”display_name“:”Biology“,”level“:0,”score“:0.0}],”mesh“:[],”locations_count“:2,”location“:[{”is_oa“:true,”landing_page_url“:”https://arxiv.org/abs/2312.15997“,”pdf_url“:空,”源“:{”id“:”https://openalex.org/S4306400194“,”display_name“:”arXiv(康奈尔大学)“,”issn_l“:null,”issn“:null,”is_oa“:true,”is_ in_doaj“:false,”host_organization“:”https://openalex.org/I205783295“,”“host_organization_name”:“康奈尔大学”,“host_organization_lineage”:[“https://openalex.org/I205783295“],”host_organization_lineage_names“:[”康奈尔大学“],”type“:”repository“},”license“:”cc by sa“,”license_id“:”https://openalex.org/licenses/cc-by-sa“,”version“:”submittedVersion“,”is_accepted“:false,”is_published“:false},{”is_oa“:false,”landing_page_url“:”https://api.datacite.org/dois/10.48550/arxiv.2312.15997“,”pdf_url“:空,”源“:{”id“:”https://openalex.org/S4393179698“,”display_name“:”DataCite API“,”issn_l“:null,”issn“:null,”is_oa“:true,”is_ in_doaj“:false,”host_organization“:”https://openalex.org/I4210145204“,”“host_organization_name”:“DataCite”,“host_organization_lineage”:[“https://openalex.org/I4210145204“],”host_organization_lineage_names“:[”DataCite“],“type”:“metadata”},“license”:null,“licence_id”:null,“version”:null}],“best_oa_location”:{“is_oa”:true,“landing_page_url”:“https://arxiv.org/abs/2312.15997“,”pdf_url“:空,”源“:{”id“:”https://openalex.org/S4306400194“,”display_name“:”arXiv(康奈尔大学)“,”issn_l“:null,”issn“:null,”is_oa“:true,”is_ in_doaj“:false,”host_organization“:”https://openalex.org/I205783295“,”“host_organization_name”:“康奈尔大学”,“host_organization_lineage”:[“https://openalex.org/I205783295“],”host_organization_lineage_names“:[”康奈尔大学“],“type”:“repository”},“license”:“cc-by-sa”,“licence_id”:“https://openalex.org/licenses/cc-bysa“,”version“:”submittedVersion“,”is_accepted“:false,”is_published“:false},”sustainable_development_goals“:[{”score“:0.59,”display_name“:”和平、正义和强大的机构“,”id“:”https://metadata.un.org/sdg/16“}],”grants“:[],”datasets“:[],”versions“:[】,”referenced_works_count“:0,”referrenced_works“:],”related_work斯“:[”https://openalex.org/W4285360723","https://openalex.org/W4281847990","https://openalex.org/W3037056935","https://openalex.org/W29342621214","https://openalex.org/W2613921548","https://openalex.org/W2611407113","https://openalex.org/W2488228222","https://openalex.org/W2092282862","https://openalex.org/W2002563848","https://openalex.org/W1498449133“],”ngrams_url“:”https://api.openalex.org/works/W4390380480/ngrams网站“,”“abstract_inverted_index”:{“Aliging”:[0],“large”:[1],“language”:[2],“models”:[3],“(LLM)”:[4],“with”:[5141],“human”:[6,35,43,83146169],“preferences”:[7,84147170],“is”:[8,54],“critical”:/9],“for”:[10,25,81174],“enhancement”:[11],“their”:[12],“utility”:[13],“in”:[14,63,86131166],“术语”:[15],“of”:[16,49,70,88,97129145],“帮助”:[17]真实性,“:[18],”安全性“:[19],”无害性“:[20],”和“:[21,60,93121],”趣味性。“:[22],“现有”:[23],“方法”:[24],“实现”:[26],“这”:[27,74],“对齐”:[28],“经常”:[29],“涉及”:[30],“使用”:[31],“强化”:[32],“学习”:[33],“来自”:[34,66111],“反馈”:[36],“(RLHF)”:[37],“到”:[38,56,77116123139154],“微调”:[39],“LLMs”:[40],“基于”:[41],“依据”:[42],“标签”:[44],“评估”:[45],““:[46,67127],”相对“:[47],”质量“:[48],”模型“:[50,98],”响应。“:[51],”然而,“:[52],”RLHF“:[53],”易受影响“:[55],”不稳定“:[57],”期间“:[58],”微调“:[59],”呈现“:[61],”挑战“:[62],”实现。绘图:[64],“灵感”:[65],“新兴”:[68],“领域”:[69],“表征”:[71],“工程”:[72],“(RepE)”:[73],“研究”:[75],“目标”:[76],“识别”:[78],“相关”:[79],“表述”:[80138],“高级”:[82],“嵌入”:[85],“模式”:[87],“活动”:[89],“内部”:[90],“an”:[91],“LLM,“:[92],”实现“:[94],”精确“:[95],”控制“:[96],”行为“:[99],”by“:[100],”transforming“:[101],”its“:[102172],”representations。“:[103],“This”:[104],“novel”:[105],“approach”,“:[106],“indicated”:%107],“as”:[108],“Representation”:[109],“Alignment”:[110],“Human”:[112],“Feedback”:[113],“(RAHF),”:[114],“provides”:[115],“be”:[117],“effective”:+118],“computationally”:[119],“高效”:[120],“easy”:/12],“implement”。广泛”:[124],“实验”:[125],“演示”:[126],“功效”:[128],“RAHF”:[130],“非”:[132],“仅”:[133],“捕获”:[134],“但”:[135],“还”:[136],“操纵”:[137],“对齐”:[140],“a”:[142155],“广泛”:%143],“光谱”:[144],“或”:[148158162],“值”:[149],“而不是”:[150],“比”:[151],“存在”:[152],“受限”:[153],“单数”:[156]概念“:[157],”功能“:[159],”(例如“:[160],”诚实“:[161],”偏见)。“:[163],”RAHF’s“:[164],”多功能“:[165],”适应性“:[167],”多样性“:[168],”表演“:[171],”潜力“:[173],”前进“:[175],”LLM“:[176],”性能。“:[177]},”cited_by_api_url“:”https://api.openalex.org/works?filter=cites:W4390380480“,”counts_by_year“:[],”updated_date“:”2024-05-25T10:34:44.855183“,”created_date:“2023-12-29”}“