{“id”:“https://openalex.org/W4396651278“,”doi“:”https://doi.org/10.48550/arxiv.2405.01511“,”title“:”D2PO:带响应评估模型的鉴别器引导DPO“,”display_name“:”D1PO:带反应评估模型的辨别器引导的DPO“。”publication_year“:2024,”publication_date“:”2024-05-02“,”ids“:{”openalex“:”https://openalex.org/W4396651278“,”doi“:”https://doi.org/10.48550/arxiv.2405.01511“},”language“:”en“,”primary_location“:{”is_oa“:true,”landing_page_url“:”https://arxiv.org/abs/2405.01511,“pdf_url”:https://arxiv.org/pdf/2405.01511,“源”:{“id”:https://openalex.org/S4306400194“,”display_name“:”arXiv(康奈尔大学)“,”issn_l“:null,”issn“:null,”is_oa“:true,”is.in_doaj“:false,”is_core“:fase,”host_organization“:”https://openalex.org/I205783295“,”“host_organization_name”:“康奈尔大学”,“host_organization_lineage”:[“https://openalex.org/I205783295“],”host_organization_lineage_names“:[”Cornell University“],“type”:“repository”},“license”:null,“licence_id”:null,“version”:“submittedVersion”,“is_accepted”:false,“is_published”:false},”type“:”preprint“,”type_crossref“:“posted-content”,“indexed_in”:[”arxiv“],‘open_access’:{”is_oa“:true,”“oa_status”:“green”,“oa_url”:“https://arxiv.org/pdf/2405.01511“,”any_repository_has_fulltext“:true},”authorships“:[{”author_position“:”第一“,”作者“:{”id“:”https://openalex.org/A5096365567“,”display_name“:”Prasann Singhal“,”orcid“:null},”institutions“:[],”countries“:[],”is_corresponding“:false,”raw_author_name“:”Singhal,Prasan“,”raw _affiliation_strings“:],”afliations“:【】},{”author_position“:”middle“,”author“:{”id“:”https://openalex.org/A5096130908“,”display_name“:”Nathan Lambert“,”orcid“:null},”institutions“:[],”countries“:[],”is_corresponding“:false,”raw_author_name“:”Lambert,Nathan“,”raw _affiliation_strings“:],”affiliations“:[/]},{”author_position“:”middle“,”author“:{”id“:”https://openalex.org/A5043572737“,”display_name“:”Scott Niekum“,”orcid“:null},”institutions“:[],”countries“:[],”is_corresponding“:false,”raw_author_name“:”Niekum,Scott“,”raw _affiliation_strings“:],”affiliations“:[/]},{”author_position“:”middle“,”author“:{”id“:”https://openalex.org/A502127239“,”display_name“:”Tanya Goyal“,”orcid“:null},”institutions“:[],”countries“:[],”is_corresponding“:false,”raw_author_name“:”Goyal,Tanya.“,”raw _affiliation_strings“:],”affiliations“:【】},{”author_position“:”last“,”author“:{”id“:”https://openalex.org/A5015133105“,”display_name“:”Greg Durrett“,”orcid“:”https://orcid.org/0000-0002-7061-7298“},”机构“:[],”国家“:[[],”is_corresponding“:false,”raw_author_name“:”Durrett,Greg“,”raw_affiliation_strings“:[],“隶属关系”:[]}],“机构资产”:[]“:null,”fwci“:null,”has_fulltext“:false,”cited_by_count“:0,”citation_normalized_percentile“:{”value“:0.0,”is_in_top_1_percent“:false},”cited_by_percentile_year“:{“min”:0,“max”:87},“biblio”:{“volume”:null,“issue”:null,“first_page”:nully,“last_page”:null},‘is_retracted’:false _主题“:{”id“:”https://openalex.org/T11512“,”“display_name”:“高维数据中的异常检测”,“score”:0.8714,“subfield”:{“id”:“https://openalex.org/subfields/1702“,”display_name“:”人工智能“},”字段“:{”id“:”https://openalex.org/fields/17“,”display_name“:”Computer Science“},”domain“:{”id“:”https://openalex.org/domains/3“,”display_name“:”物理科学“}},”主题“:[{”id“:”https://openalex.org/T11512“,”“display_name”:“高维数据中的异常检测”,“score”:0.8714,“subfield”:{“id”:“https://openalex.org/subfields/1702“,”display_name“:”人工智能“},”字段“:{”id“:”https://openalex.org/fields/17“,”display_name“:”Computer Science“},”domain“:{”id“:”https://openalex.org/domains/3“,”display_name“:”物理科学“}}],”关键词“:[{”id“:”https://openalex.org/keywords/discriminator网站“,”display_name“:”鉴别器“,”score“:0.9477631},{”id“:”https://openalex.org/keywords/outlier-detection(https://openalex.org/keywords/outlier-detection)“,”display_name“:”离群检测“,”score“:0.529033},{”id“:”https://openalex.org/keywords/dep-learning网站“,”display_name“:”深度学习“,”score“:0.513711},{”id“:”https://openalex.org/keywords/高维数据“,”display_name“:”高维数据“,”score“:0.510947}],”concepts“:[{”id“:”https://openalex.org/C2779803651,“wikidata”:https://www.wikidata.org/wiki/Q5282088“,”display_name“:”Discriminator“,”level“:3,”score“:0.9477631},{”id“:”https://openalex.org/C41008148,“wikidata”:https://www.wikidata.org/wiki/Q21198“,”display_name“:”计算机科学“,”level“:0,”score“:0.4530995},{”id“:”https://openalex.org/C76155785,“wikidata”:https://www.wikidata.org/wiki/Q418“,”display_name“:”Telecommunications“,”level“:1,”score“:0.06616405},{”id“:”https://openalex.org/C94915269,“wikidata”:https://www.wikidata.org/wiki/Q1834857“,”display_name“:”Detector“,”level“:2,”score“:0.0}],”mesh“:[],”locations_count“:1,”location“:[{”is_oa“:true,”landing_page_url“:”https://arxiv.org/abs/2405.01511,“pdf_url”:https://arxiv.org/pdf/2405.01511,“源”:{“id”:https://openalex.org/S4306400194“,”display_name“:”arXiv(康奈尔大学)“,”issn_l“:null,”issn“:null,”is_oa“:true,”is.in_doaj“:false,”is_core“:fase,”host_organization“:”https://openalex.org/I205783295“,”“host_organization_name”:“康奈尔大学”,“host_organization_lineage”:[“https://openalex.org/I205783295“],”host_organization_lineage_names“:[”康奈尔大学“],“type”:“repository”},“license”:null,“licence_id”:null,“version”:“submittedVersion”,“is_accepted”:false,“is_published”:false}],“best_oa_location”:{“is_oa”:true,“landing_page_url”:“https://arxiv.org/abs/2405.01511,“pdf_url”:https://arxiv.org/pdf/2405.01511,“源”:{“id”:https://openalex.org/S4306400194“,”display_name“:”arXiv(康奈尔大学)“,”issn_l“:null,”issn“:null,”is_oa“:true,”is.in_doaj“:false,”is_core“:fase,”host_organization“:”https://openalex.org/I205783295“,”“host_organization_name”:“康奈尔大学”,“host_organization_lineage”:[“https://openalex.org/I205783295“],”host_organization_lineage_names“:[”康奈尔大学“],“type”:“repository”},“license”:null,“licence_id”:null,“version”:“submittedVersion”,“is_accepted”:false,“is_published”:false},”sustainable_development_goals“:[],“grants”:[]https://openalex.org/W4391375266","https://openalex.org/W4380714744","https://openalex.org/W4319453655","https://openalex.org/W4293202849","https://openalex.org/W2964074194","https://openalex.org/W2748952813","https://openalex.org/W2387995142","https://openalex.org/W2089959425","https://openalex.org/W1980965563","https://openalex.org/W1489300767“],”abstract_inverted_index“:{”Varied“:[0],”approachs“:[1],”for“:[2,64103],”aligning“:[3],”language“:[4],”models“:[5],”have“:[6],”been“:[7],”proposed“:[8],”including“:[9116],”supervised“:[10],”fine-tuning“:[11],”RLHF“:[12],”and“:[13,32138171],”direct“:[14],“optimization”:[15],“方法”:[16],“例如”:[17],“如”:[18],“DPO.”:[19],“虽然”:[20],“DPO”:[21132],“has”:[22],,“rapidly”:[23],“gathered”:[24],“popularity”:[25],“due”:[26],“to”:[27,54,85,90,97127131],“its”:[208],“direct”:[29],“training”:[30163],“process”:[31],“competitive”:[33],“results”,“there”:[35,42],“is”:[36155159],“an”:[37,62],“打开”:[38],“问题”:[39],“的”:[40,46113143],“是否”:[41],“保持”:[43],“实用”:[44],“优点”:[45],“使用”:[47],“a”:[48,51,92111117175],“鉴别器”:[49],“喜欢”:[50],“奖励”:[52],“模型”:[53],“评估”:[55],“回应”:[56],“我们”:[57106],“提议”:[58],“D2PO”:[59],“辨别器引导”:[60],“DPO”:[61167],“方法”:[63109125],“the”:[65134164179],“online”:[66],“setting”:[67],“where”:【68】,“偏好”:[69],“是”:[70],“正在”:[71],“收集”:[72],“贯穿”:[73],“学习”:[74],“作为”:[75],“我们”:[76,80121148],“收藏”:[77],“黄金”:[78],“偏好”:[88],“但是”:[89],“区别”:[93],“回应”:[94],“评估”:[95],“模型”:[96],“银色标签”:[98],“偶数”:[99],“更多”:[100],“合成”:[101],“数据”:[102136145],“策略”:[104165180],“训练”:[105],“探索”:[107],“这个”:[108],“跨越”:[110],“集合”:[112],“多样”:[114],“任务”:[115],“现实”:[118],“聊天”:[119],“设置”:[120],“查找”:[122],“那个”:[123],“leads”:[126],“higher quality”:[128],“outputs”:[129],“compared”:[130],“with”:[133166],“same”:[135],“budget”,:[137],“greater”:[139],“efficiency”:[140],“in”:[141],“terms”:[142],“preference”:%144],“requirements”。:[146],“此外,”:[147],“show”:[149],“conditions”:[150],“under”:[151],“which”:[152],“silver”:%153],“labeling”:[154],“most”:[156160],“help:“:[157],”it“:[158],”effective“:[161],”when“:[162],“跑赢大市”:[168],“传统”:[169],“PPO”:[170],“收益”:[172],“来源”:[173178],“维持”:[174],“分离”:[176],“鉴别器”:[177],“模型”:[181]},“引用_by_api_url”:“https://api.openalex.org/works?filter=引用:W4396651278“,”counts_by_year“:[],”updated_date“:”2024-09-24T23:48:33.739323“,”created_date:“2024-05-05”}“