{“id”:“https://openalex.org/W4392121074“,”doi“:”https://doi.org/10.48550/arxiv.2402.14688“,”title“:”Q-Probe:语言模型奖励最大化的轻量级方法“,”display_name“:”Q-Probe:A Lightweight Approach to Reward Maximization for Language Models“,”publication_year“:2024,”publitation_date“:”2024-02-22“,”ids“:{”openalex“:”https://openalex.org/W4392121074“,”doi“:”https://doi.org/10.48550/arxiv.2402.14688“},”language“:”en“,”primary_location“:{”is_oa“:true,”landing_page_url“:”https://arxiv.org/abs/2402.14688,“pdf_url”:https://arxiv.org/pdf/2402.14688,“源”:{“id”:https://openalex.org/S4306400194“,”display_name“:”arXiv(康奈尔大学)“,”issn_l“:null,”issn“:null,”is_oa“:true,”is_in_doaj“:false,”is_core“:false,”host_organization“:”https://openalex.org/I205783295“,”“host_organization_name”:“康奈尔大学”,“host_organization_lineage”:[“https://openalex.org/I205783295“],”host_organization_lineage_names“:[”Cornell University“],“type”:“repository”},“license”:null,“licence_id”:null,“version”:“submittedVersion”,“is_accepted”:false,“is_published”:false},”type“:”preprint“,”type_crossref“:“posted-content”,“indexed_in”:[”arxiv“],‘open_access’:{”is_oa“:true,”“oa_status”:“green”,“oa_url”:“https://arxiv.org/pdf/2402.14688“,”any_repository_has_fulltext“:true},”authorships“:[{”author_position“:”第一“,”作者“:{”id“:”https://openalex.org/A5102795202“,”display_name“:”Kenneth Li“,”orcid“:”https://orcid.org/0009-0007-9132-915X“},”机构“:[],”国家“:[[],”is_corresponding“:false,”raw_author_name“:”Li,Kenneth“,”raw_affiliation_strings“:[],“affiliations”:[]},{“author_position”:“middle”,“author”:{“id”:“https://openalex.org/A5048166612“,”display_name“:”Samy Jelassi“,”orcid“:null},”institutions“:[],”countries“:[],”is_corresponding“:false,”raw_author_name“:”Jelassi,Samy“,”raw _affiliation_strings“:],”affiliations“:【】},{”author_position“:”middle“,”author“:{”id“:”https://openalex.org/A5104324469“,”display_name“:”Hugh Zhang“,”orcid“:null},”institutions“:[],”countries“:[],”is_corresponding“:false,”raw_author_name“:”Zhang,Hugh“,”raw _affiliation_strings“:],”affiliations“:[/]},{”author_position“:”middle“,”author“:{”id“:”https://openalex.org/A5018792915“,”display_name“:”Sham M.Kakade“,”orcid“:null},”institutions“:[],”countries“:[],”is_corresponding“:false,”raw_author_name“:”Kakade,Sham“,”raw _affiliation_strings“:],”affiliations“:【】},{”author_position“:”middle“,”author“:{”id“:”https://openalex.org/A5039276358“,”display_name“:”Martin Wattenberg“,”orcid“:”https://orcid.org/0000-0003-0904-4862“},”机构“:[],”国家“:[],”is_correresponsing“:false,”raw_author_name“:”Wattenberg,Martin“,”raw_affiation_strings“:[],”附属“:[]},{”author_position“:”last“,”author“:{”id“:”https://openalex.org/A5037978510“,”display_name“:”David Brandfonbrener“,”orcid“:null},”institutions“:[],”countries“:[],”is_corresponding“:false,”raw_author_name“,”Brandfonprener,David“,”raw _affiliation_strings“:],”affiliations“:【】}“corresponding_institution_ids“:[],”apc_list“:null,”apc _ payed“:null,”fwci“:null,”has_fulltext“:false,”cited_by_count“:0,”citation_normalized_percentile“:{”value“:0.0,”is_in_top_1_percent“:false,”is_ in_top_ 10_percennt“:false},”cited_by_percentile_year“:{“min”:0,“max”:85},“biblio”:{“volume”:null、“issue”:null,“first_page”:空,“last_page”:空},“is_retracted“:false,”is_paratext“:fase,”primary_topic“:{”id“:”https://openalex.org/T10028“,”display_name“:”自然语言处理“,”score“:0.9871,”subfield“:{”id“:”https://openalex.org/subfields/s702“,”display_name“:”人工智能“},”字段“:{”id“:”https://openalex.org/fields/17“,”display_name“:”Computer Science“},”domain“:{”id“:”https://openalex.org/domains/3“,”display_name“:”物理科学“}},”主题“:[{”id“:”https://openalex.org/T10028“,”display_name“:”自然语言处理“,”score“:0.9871,”subfield“:{”id“:”https://openalex.org/subfields/s702“,”display_name“:”人工智能“},”字段“:{”id“:”https://openalex.org/fields/17“,”display_name“:”Computer Science“},”domain“:{”id“:”https://openalex.org/domains/3“,”display_name“:”物理科学“}},{”id“:”https://openalex.org/T10181“,”display_name“:”统计机器翻译与自然语言处理“,”score“:0.9858,”subfield“:{”id“:”https://openalex.org/subfields/s702“,”display_name“:”人工智能“},”字段“:{”id“:”https://openalex.org/fields/17“,”display_name“:”Computer Science“},”domain“:{”id“:”https://openalex.org/domains/3“,”display_name“:”物理科学“}}],”关键词“:[{”id“:”https://openalex.org/keywords/maximization网站“,”display_name“:”Maximization“,”score“:0.6311868},{”id“:”https://openalex.org/keywords/language-modeling“,”display_name“:”语言建模“,”score“:0.558189},{”id“:”https://openalex.org/keywords/topic-modeling网站“,”display_name“:”Topic Modeling“,”score“:0.527043}],”concepts“:[{”id“:”https://openalex.org/C2776330181,“wikidata”:https://www.wikidata.org/wiki/Q18358244网址“,”display_name“:”Maximization“,”level“:2,”score“:0.6311868},{”id“:”https://openalex.org/C41008148,“wikidata”:https://www.wikidata.org/wiki/Q21198“,”display_name“:”计算机科学“,”level“:0,”score“:0.54516035},{”id“:”https://openalex.org/C2985793214,“wikidata”:https://www.wikidata.org/wiki/Q3274096“,”display_name“:”效用最大化“,”level“:2,”score“:0.41718778},{”id“:”https://openalex.org/C15744967,“wikidata”:https://www.wikidata.org/wiki/Q9418“,”display_name“:”心理学“,”等级“:0,”分数“:0.24616009},{”id“:”https://openalex.org/C162324750,“wikidata”:https://www.wikidata.org/wiki/Q8134“,”display_name“:”经济学“,”level“:0,”score“:0.18273026},{”id“:”https://openalex.org/C144237770,“wikidata”:https://www.wikidata.org/wiki/Q747534“,”display_name“:”数理经济学“,”level“:1,”score“:0.103019},{”id“:”https://openalex.org/C77805123,“wikidata”:https://www.wikidata.org/wiki/Q161272“,”display_name“:”社会心理学“,”level“:1,”score“:0.08080712}],”mesh“:[],”locations_count“:1.”locations“:[{”is_oa“:true,”landing_page_url“:”https://arxiv.org/abs/2402.14688,“pdf_url”:https://arxiv.org/pdf/2402.14688,“源”:{“id”:https://openalex.org/S4306400194“,”display_name“:”arXiv(康奈尔大学)“,”issn_l“:null,”issn“:null,”is_oa“:true,”is_in_doaj“:false,”is_core“:false,”host_organization“:”https://openalex.org/I205783295“,”“host_organization_name”:“康奈尔大学”,“host_organization_lineage”:[“https://openalex.org/I205783295“],”host_organization_lineage_names“:[”康奈尔大学“],“type”:“repository”},“license”:null,“licence_id”:null,“version”:“submittedVersion”,“is_accepted”:false,“is_published”:false}],“best_oa_location”:{“is_oa”:true,“landing_page_url”:“https://arxiv.org/abs/2402.14688,“pdf_url”:https://arxiv.org/pdf/2402.14688,“源”:{“id”:https://openalex.org/S4306400194“,”display_name“:”arXiv(康奈尔大学)“,”issn_l“:null,”issn“:null,”is_oa“:true,”is_in_doaj“:false,”is_core“:false,”host_organization“:”https://openalex.org/I205783295“,”“host_organization_name”:“康奈尔大学”,“host_organization_lineage”:[“https://openalex.org/I205783295“],”host_organization_lineage_names“:[”康奈尔大学“],“type”:“repository”},“license”:null,“licence_id”:null,“version”:“submittedVersion”,“is_accepted”:false,“is_published”:false},”sustainable_development_goals“:[],“grants”:[]https://openalex.org/W4388998647","https://openalex.org/W4380149910","https://openalex.org/W4296628518","https://openalex.org/W3121521771","https://openalex.org/W30823005","https://openalex.org/W2741533395","https://openalex.org/W2490250203","https://openalex.org/W2393902186","https://openalex.org/W2267645079","https://openalex.org/W2118922860“],”abstract_inverted_index“:{”We“:[0,67],”present“:[1],”an“:[2150],”approach“:[3],”called“:[4],”Q-probing“:[5,22],”to“:[6,12,48,63,76157],”adapt“:[7],”a“:[8,14,19,50,55,77,99142],”预训练“:[9],”language“:[10],”model“:[11],”maximize“:[13],”task-specific“:[15],“奖励”:[16,96],“功能”:[17],“地点”:[18],“高”:[20],“等级”,:[21],“座位”:[23]介于“:[24],“较重”:[25],“接近”:[26,32],“如”:[27,33],“例如”:[28,34,83126128],“微调”:[29137],“和”:[30159],“较轻”:[31],“少”:[35],“射门”:[36],“提示”,“:[37],”:[38],“能”:[39,60144],“也”:[40],“be”:[41,61145],“组合”:[42],“with”:[43121],“两者都可以。“:[44],“The”:[45],“idea”:[46],“is”:[47,74],“learn”:[49],“simple”:[51],“linear”:[52],“function”:[53],“on”:[54108147],“model’s”:[56],“embedding”:[57],“space”:[58],“that”:[59,70],“used”:[62],“reweight”:[64],“candidated”:[65],“completions”。“:[66],”理论“:[68],”显示“:[69],”此“:[71114],”采样“:[72158],”程序“:[73],”等效“:[75],”KL约束“:[78],”最大化“:[79],”of“:[80,86101149],”the“:[81,84,91],”Q-probe“:[82143],”number“:[85],”samples“:[87],”增加。“:[88],“To”:[89],“train”:[90],“Q-probes”:[92],“we”:[C3116],“consider”:[94],“either”:/95],“modeling”:[97],“or”:[98],“class”:[100],“novel”:[102],“direct”:[103],“policy”:[104111],“learning”:[105],“objectives”:[106],“based”:[107],“importance”:[109],“加权”:[110],“梯度。“:[112],”With“:[113],”technology“:[115],”see“:[117],”gains“:[118],”in“:[119138],”domains“:[120],”ground-truth“:/122],”rewards“:/123130],”(code“:[124],”generation)“:[125],”well“:[127],”implicit“:[129],”defined“:[131],”by“:[132],”preference“:%133],”data,“:[134],”even“:[135],“跑赢大市”:[136],“数据限制”:[139],“政体”。“:[140],”“此外,”:[141],“训练有素”:[146],“顶部”:[148],“API”:[151],“自”:[152],“it”:[153],“仅”:[154],“假设”:[155],“访问”:[156],“嵌入。“:[160],”代码:“:[161],”https://github.com/likenneth/q_probe网站":[162],".“:[163]},”cited_by_api_url“:”https://api.openalex.org/works?filter=cites:W4392121074“,”counts_by_year“:[],”updated_date“:”2024-09-16T09:45:25.456463“,”created_date”:“2024-02-24”}