{“id”:“https://openalex.org/W2154023516“,”doi“:”https://doi.org/10.1007/s10994-012-5313-8“,”title“:“基于偏好的强化学习:一个正式框架和一个策略迭代算法”,”display_name“:”基于偏好的加强学习:一种正式框架和策略迭代算法“,”publication_year“:2012,”publiation_date“:”2012-08-10“,”ids“:{”openalex“:”https://openalex.org/W2154023516“,”doi“:”https://doi.org/10.1007/s10994-012-5313-8“,”mag“:”2154023516“},”language“:”en“,”primary_location“:{”is_oa“:true,”landing_page_url“:”https://doi.org/10.1007/st0994-012-5313-8,“pdf_url”:https://link.springer.com/content/pdf/10.1007%2Fs10994-012-5313-8.pdf“,”源“:{”id“:”https://openalex.org/S62148650“,”display_name“:”机器学习“,”issn_l“:”0885-6125“,”isn“:[”0885-6 125“,“1573-0565”],”is_oa“:false,”is_ in_doaj“:false,”host_organization“:”https://openalex.org/P4310318900“,”“host_organization_name”:“Springer Science+Business Media”,“host_organization_lineage”:[“https://openalex.org/P431031965","https://openalex.org/P4310318900“],”host_organization_lineage_names“:[”Springer Nature“,”Springer-Science+Business Media“],“type”:“journal”},“license”:null,“licence_id”:nul,“version”:“publishedVersion”,“is_accepted”:true,“is_published”:true},”type“:“article”,“type_crossref”:”journal-article“,”indexed_in“:[“crossref”],“open_access”:{“is_oa”:true,“oa_status”:“青铜色,“oa_url”:https://link.springer.com/content/pdf/10.1007%2Fs10994-012-5313-8.pdf“,”any_repository_has_fulltext“:false},”作者身份“:[{”作者位置“:”第一个“,”作者“:{”id“:”https://openalex.org/A5020009031“,”display_name“:”Johannes F\u00fcrnkranz“,”orcid“:”https://orcid.org/0000-0002-1207-0159},“机构”:[{“id”:https://openalex.org/I31512782“,”display_name“:”达姆施塔特科技大学“,”ror“:”https://ror.org/05n911h24“,”country_code“:”DE“,”type“:“教育”,”世系“:[”https://openalex.org/I31512782“]}],”国家“:[”DE“],”is_corresponding“:true,”raw_author_name“:”Johannes F\u00fcrnkranz“,”raw _ afiliation_strings“:[“德国达姆施塔特大学达姆施塔计算机科学系”],”affiliations“:[{”raw _affiliation_string“:”德国达姆斯塔特大学计算机科学系“,”institution_ids“:[https://openalex.org/I31512782“]}]},{”author_position“:”middle“,”author“:{”id“:”https://openalex.org/A5059439673“,”display_name“:”Eyke H\u00fcllermier“,”orcid“:”https://orcid.org/0000-0002-9944-4108},“机构”:[{“id”:https://openalex.org/I161103922“,”display_name“:”马尔堡菲利普斯大学“,”ror“:”https://ror.org/01rdrb571“,”country_code“:”DE“,”type“:“教育”,”世系“:[”https://openalex.org/I161103922“]}],”国家“:[”DE“],”is_corresponding“:false,”raw_author_name“:”Eyke H\u00fcllermier“,”raw_affiliation_strings“:[“德国马尔堡大学数学与计算机科学系”],”affiliations“:[{”raw_affiliation_string“:”德国马尔堡市马尔堡大学计算机科学系“,”机构ID“:[”https://openalex.org/I161103922“]}]},{”author_position“:”middle“,”author“:{”id“:”https://openalex.org/A5043737358“,”display_name“:”Weiwei Cheng“,”orcid“:”https://orcid.org/0000-0002-3381-4188},“机构”:[{“id”:https://openalex.org/I161103922“,”display_name“:”马尔堡菲利普斯大学“,”ror“:”https://ror.org/01rdrb571“,”country_code“:”DE“,”type“:“教育”,”世系“:[”https://openalex.org/I161103922“]}],”countries“:[”DE“],”is_corresponding“:false,”raw_author_name“:”Weiwei Cheng“,”raw _affiliation_strings“:【”德国马尔堡大学数学与计算机科学系“】,”affiliations“:[{”raw _affiliation_string“:”德国马尔伯格大学数学与计算科学系“,”institution_ids“:[“https://openalex.org/I161103922“]}]},{”author_position“:”last“,”author“:{”id“:”https://openalex.org/A5021770384“,”display_name“:”Sang-Hyeun Park“,”orcid“:null},”institutions“:[{”id“:”https://openalex.org/I31512782“,”display_name“:”达姆施塔特科技大学“,”ror“:”https://ror.org/05n911h24“,”country_code“:”DE“,”type“:“教育”,”世系“:[”https://openalex.org/I31512782“]}],”countries“:[”DE“],”is_corresponding“:false,”raw_author_name“:”Sang Hyeun Park“,”raw_affiation_strings“:[”德国达姆施塔特大学计算机科学系“],”affiliations“:[{”raw_affiation_string“:”德国达姆施塔特大学计算机科学系“,”institution_ids“:[”https://openalex.org/I31512782“]}]}],”countries_disticont_count“:1,”institutions_disticent_count”:2,”corresponding_author_ids“:[”https://openalex.org/A5020009031“],”对应的机构ID“:[”https://openalex.org/I31512782“],”apc_list“:{”value“:2390,”currency“:”EUR“,”value_usd“:2990,”provenance“:”doaj“},”apc _payd“:null,”fwci“:8.575,”has_fulltext“:false,”cited_by_count“:91,”citecd_by_percentile_year“:”{“min”:97,“max”:98},“biblio”:{“volume”:“89”,“issue”:“1-2”,“first_page”:“123”,“last_page”:“156”},“is_retracted”:false,“is_paratext”:false,“primary_topic”:{“id”:“https://openalex.org/T10462“,”display_name“:”强化学习算法“,”score“:0.9963,”subfield“:{”id“:”https://openalex.org/subfields/1702“,”display_name“:”人工智能“},”字段“:{”id“:”https://openalex.org/fields/17“,”display_name“:”Computer Science“},”domain“:{”id“:”https://openalex.org/domains/3“,”display_name“:”物理科学“}},”主题“:[{”id“:”https://openalex.org/T10462“,”display_name“:”强化学习算法“,”score“:0.9963,”subfield“:{”id“:”https://openalex.org/subfields/1702“,”display_name“:”人工智能“},”字段“:{”id“:”https://openalex.org/fields/17“,”display_name“:”Computer Science“},”domain“:{”id“:”https://openalex.org/domains/3“,”display_name“:”物理科学“}},{”id“:”https://openalex.org/T11178“,”display_name“:”G蛋白偶联受体的结构和功能“,”score“:0.9913,”subfield“:{”id“:”https://openalex.org/subfields/1312“,”display_name“:”分子生物学“},”字段“:{”id“:”https://openalex.org/fields/13“,”“display_name”:“生物化学、遗传学和分子生物学”},“域”:{“id”:“https://openalex.org/domains/1“,”display_name“:”生命科学“}},{”id“:”https://openalex.org/T12761“,”“display_name”:“适应数据流中的概念漂移”,“score”:0.9819,“subfield”:{“id”:“https://openalex.org/subfields/1702“,”display_name“:”人工智能“},”字段“:{”id“:”https://openalex.org/fields/17“,”display_name“:”Computer Science“},”domain“:{”id“:”https://openalex.org/domains/3“,”display_name“:”物理科学“}}],”关键词“:[{”id“:”https://openalex.org/keywords/reinforcement-learning网站“,”display_name“:”强化学习“,”score“:0.651919},{”id“:”https://openalex.org/keywords/incremental-learning网站“,”display_name“:”增量学习“,”score“:0.563639},{”id“:”https://openalex.org/keywords/ensembly-learning“,”display_name“:”合奏学习“,”score“:0.534874},{”id“:”https://openalex.org/keywords/biased-signaling(https://openalex.org/keywords/biased-signaling)“,”display_name“:”偏置信号“,”score“:0.518638},{”id“:”https://openalex.org/keywords/adaptive-algorithms网站“,”display_name“:”自适应算法“,”score“:0.511393}],”concepts“:[{”id“:”https://openalex.org/C97541855,“wikidata”:https://www.wikidata.org/wiki/Q830687“,”display_name“:”强化学习“,”level“:2,”score“:0.8650584},{”id“:”https://openalex.org/C177148314,“wikidata”:https://www.wikidata.org/wiki/Q170084“,”display_name“:”Generalization“,”level“:2,”score“:0.7464553},{”id“:”https://openalex.org/C2781249084,“wikidata”:https://www.wikidata.org/wiki/Q908656“,”display_name“:”首选项“,”级别“:2,”分数“:0.73269737},{”id“:”https://openalex.org/C181204326,“wikidata”:https://www.wikidata.org/wiki/Q7239820“,”display_name“:”偏好学习“,”level“:3,”score“:0.71138704},{”id“:”https://openalex.org/C41008148,“wikidata”:https://www.wikidata.org/wiki/Q21198“,”display_name“:”计算机科学“,”level“:0,”score“:0.68432623},{”id“:”https://openalex.org/C189430467,“wikidata”:https://www.wikidata.org/wiki/Q7293293“,”display_name“:”排名(信息检索)“,”级别“:2,”分数“:0.6066848},{”id“:”https://openalex.org/C154945302,“wikidata”:https://www.wikidata.org/wiki/Q11660“,”display_name“:”人工智能“,”level“:1,”score“:0.6046994},{”id“:”https://openalex.org/C119857082,“wikidata”:https://www.wikidata.org/wiki/Q2539“,”display_name“:”机器学习“,”level“:1,”score“:0.5745514},{”id“:”https://openalex.org/C111696304,“wikidata”:https://www.wikidata.org/wiki/Q2303697“,”display_name“:”Sorting“,”level“:2,”score“:0.50528103},{”id“:”https://openalex.org/C2777868144,“wikidata”:https://www.wikidata.org/wiki/Q7239817“,”display_name“:”偏好启发“,”level“:3,”score“:0.4280116},{”id“:”https://openalex.org/C11413529,“wikidata”:https://www.wikidata.org/wiki/Q8366“,”display_name“:”Algorithm“,”level“:1,”score“:0.26889023},{”id“:”https://openalex.org/C33923547,“wikidata”:https://www.wikidata.org/wiki/Q395“,”display_name“:”数学“,”等级“:0,”分数“:0.21561933},{”id“:”https://openalex.org/C134306372,“wikidata”:https://www.wikidata.org/wiki/Q7754“,”display_name“:”数学分析“,”level“:1,”score“:0.0},{”id“:”https://openalex.org/C105795698,“wikidata”:https://www.wikidata.org/wiki/Q12483“,”display_name“:”Statistics“,”level“:1,”score“:0.0}],”mesh“:[],”locations_count“:1.”locations“:[{”is_oa“:true,”landing_page_url“:”https://doi.org/10.1007/st0994-012-5313-8,“pdf_url”:https://link.springer.com/content/pdf/10.1007%2Fs10994-012-5313-8.pdf“,”源“:{”id“:”https://openalex.org/S62148650“,”display_name“:”机器学习“,”issn_l“:”0885-6125“,”isn“:[”0885-6 125“,“1573-0565”],”is_oa“:false,”is_ in_doaj“:false,”host_organization“:”https://openalex.org/P4310318900“,”“host_organization_name”:“Springer Science+Business Media”,“host_organization_lineage”:[“https://openalex.org/P431031965","https://openalex.org/P4310318900“],”host_organization_lineage_names“:[”Springer Nature“,”Springer-Science+Business Media“],“type”:“journal”},“license”:null,“licence_id”:nul,“version”:“publishedVersion”,“is_accepted”:true,“is_published”:true}],“best_oa_location”:{“is_oa”:true,“landing_page_url”:“https://doi.org/10.1007/s10994-012-5313-8,“pdf_url”:https://link.springer.com/content/pdf/10.1007%2Fs10994-012-5313-8.pdf“,”源“:{”id“:”https://openalex.org/S62148650“,”display_name“:”机器学习“,”issn_l“:”0885-6125“,”isn“:[”0885-6 125“,“1573-0565”],”is_oa“:false,”is_ in_doaj“:false,”host_organization“:”https://openalex.org/P4310318900“,”“host_organization_name”:“Springer Science+Business Media”,“host_organization_lineage”:[“https://openalex.org/P431031965","https://openalex.org/P4310318900“],”host_organization_lineage_names“:[”Springer Nature“,”Springer-Science+Business Media“],“type”:“journal”},“license”:null,“licence_id”:nul,“version”:“publishedVersion”,“is_accepted”:true,“is_published”:true},”sustainable_development_goals“:[],”grants“:【】,”datasets“:〔】,”versions“:【〕,”referenced_works_count“:45,”https://openalex.org/W122021961","https://openalex.org/W1574700590","https://openalex.org/W1680797894","https://openalex.org/W1970789124","https://openalex.org/W1995672065","https://openalex.org/W2000759616","https://openalex.org/W2006492951","https://openalex.org/W2008763786","https://openalex.org/W2009303086","https://openalex.org/W2012392077","https://openalex.org/W2014932765","https://openalex.org/W2034191121","https://openalex.org/W2085102928","https://openalex.org/W2094387729","https://openalex.org/W2099001564","https://openalex.org/W2102705755","https://openalex.org/W2109169869","https://openalex.org/W2112420033","https://openalex.org/W2116661285","https://openalex.org/W2119567691","https://openalex.org/W2124122367","https://openalex.org/W2125612430","https://openalex.org/W2125922627","https://openalex.org/W2129297552","https://openalex.org/W2131490088","https://openalex.org/W2133632477","https://openalex.org/W2133990480","https://openalex.org/W2143280922","https://openalex.org/W2150821861","https://openalex.org/W2168319199","https://openalex.org/W2168405694","https://openalex.org/W2172968643","https://openalex.org/W2182044576","https://openalex.org/W2293743194","https://openalex.org/W2591957553","https://openalex.org/W2610184409","https://openalex.org/W2613433911","https://openalex.org/W2616052791","https://openalex.org/W2624698084","https://openalex.org/W2699476589","https://openalex.org/W2997998108","https://openalex.org/W3020831056","https://openalex.org/W4241387086","https://openalex.org/W4251324078","https://openalex.org/W4299828299“],”related_works“:[”https://openalex.org/W4386241784","https://openalex.org/W4286900255","https://openalex.org/W4200207182","https://openalex.org/W3210700034","https://openalex.org/W2914800632","https://openalex.org/W2766157851","https://openalex.org/W2157910771","https://openalex.org/W2126528747","https://openalex.org/W1562775108","https://openalex.org/W1488237461“],”ngrams_url“:”https://api.openalex.org/works/W2154023516/ngrams网站“,”abstract_inverted_index“:{”This“:[0],”paper“:[1],”makes“:[2],”a“:[3,26,88,98166172224],”first“:[4173],”step“:[5],”towards“:[6],”the“:[7,33,55,80,91107131210],”integration“:[8],”of“:[9,12,57,9010616817621222232235244],“two”:[10245],“subfields”:[11],“machine”:[13],“学习”:[14,69124219],“即”:[15],“偏好”:[16123228],“学习”:[17,20,31159229],“和”:[18217],“强化”:[19,30,68],“(RL).”:[21],“安”:[22],“重要”:[23],“动机”:[24],“for”:[25,67122144158186197215],“基于偏好”:[27236],“方法”:[28206],“to”:[29,53129151],“is”:[32103128207],“观察”:[34],“that”:[35142179189],“in”:[36,51,70,95],“many”:[37],“realveal”:[38],“域”,:[39],“数值”:[40],“反馈”:[41],“信号”:[42,74],“是”:[43,48240],“不是”:[44],“容易”:[45],“可用”,“:[46],”或“:[47],“定义”:[49],“任意”:[50],“顺序”:[52100109],“满足”:[54],“需要”:[56],“常规”:[58,92],“RL”:[59,93132],“算法”“:[60],“相反”,:[61],“我们”:[62170191220],“提议”:[63],“一个”:[64194],“替代”:[65],“框架”:[66,83,94178],“其中”:[71,96],“定性”:[72135163],“奖励”:[73],“可以”:[75],“是”:[76,85],“直接”:[77],“使用”:[78],“由”:[79111242],“学习者”:[81],“The”:[82],“may”:[84],“viewed”:[86],“as”:[87139154156],“泛化”:[89216],“仅”:[97],“部分”:[99],“介于”:[101],“政策”:[102],“需要”:[104],“代替”:[105],“总计”:[108],“诱导”:[110],“他们”:[112],“各自”:[113],“预期”:[114],“长期”:[115],“奖励”:[116],“因此”:[117],“建筑”:[118],“上”:[119183193202209],“新颖”:[120],“方法”:[121214],”我们的“:[125],“一般”:[126],“目标”:[127],“装备”:[130],“代理”:[133],“with”:[134],“policy”:[136199218238],“模型”,:[137],“此类”:[138160],“排名”:[140],“函数”:[141],“允许”:[143],“排序”:[145],“其”:[146],“可用”:[147],“操作”:[148],“来自”:[149162],“最多”:[150],“最少”:[152],“有希望”:[153],“良好”:[155],“算法”:[157],“模式”:[161],“反馈。”:[164],“As”:[165],“proof”:[167],“concept”:[169],“realize”:[171],“简单”:[174],“实例化”:[175],“此”:[177205],“定义”:[180],“首选项”:[181],“基于”:[182201208],“实用程序”:[184],“观测”:[185],“轨迹”:[187],“到”:[188],“结束”:[190],“构建”:[192],“现有”:[195],“方法”:[196230],“近似”:[198237],“迭代”:[200239],“滚动输出”.“:[203],“While”:[204],“use”:[211222],“分类”:[213],“制造”:[221],“特定”:[225],“类型”:[226],“调用”:[231],“标签”:[232],“排名”:[233],“优点”:[234],“图解”:[241],“表示”:[243],“案例”:[246],“研究”:[247]},“引用_by_api_url”:“https://api.openalex.org/works?filter=cites:W2154023516“,”counts_by_year“:[{”年“:2024,”cited_by_count“:4},{”年份“:2023,”ciped_by_cunt“:5},”{“年份”:2022,“cited_by_count”:5},{“年”:2021,”cited_by_count:10},“年”2020,”citecd_by-count“:11},“年份”:2017,“引用_ by_count”:5},{“年份”:2016,“引用_by_counts”:8},}“年份“:2015,”引用_ by_count“:7},{“年份”:2014,“引用_ by-count”:4},}“年份“:2013,”引用_ by/count“:9}],”更新日期“:“2024-06-23T23:39:43.067567”,“创建日期”:“2016-06-24”}