{“id”:“https://openalex.org/W1595271225“,”doi“:”https://doi.org/10.1007/978-3-642-22887-2_30“,”title“:“强化学习与贝叶斯控制规则”,”display_name“:”强化学习与贝氏控制规则“,”publication_year“:2011,”publiation_date“:”2011-01-01“,”ids“:{”openalex“:”https://openalex.org/W1595271225“,”doi“:”https://doi.org/10.1007/978-3-642-22887-2_30“,”mag“:”1595271225“},”language“:”en“,”primary_location“:{”is_oa“:false,”landing_page_url“:”https://doi.org/10.1007/978-3-642-22887-2_30“,”pdf_url“:空,”源“:{”id“:”https://openalex.org/S106296714“,”“display_name”“:”“计算机科学课堂讲稿”“,”issn_l“:”0302-9743“,”isn“:[”0302-7743“、”1611-3349“],”is_oa“:false,”is_in_doaj“:false,”host_organization“:”https://openalex.org/P4310318900“,”“host_organization_name”:“Springer Science+Business Media”,“host_organization_lineage”:[“https://openalex.org/P4310319965","https://openalex.org/P4310318900“],”host_organization_lineage_names“:[”Springer Nature“,”Springer-Science+Business Media“],“type”:“book-series”},“license”:null,“licence_id”:nul,“version”:null,“is_accepted”:false,“is_published”:false},”type“:”book-chapter“,”type_crossref“:“book-chapter”,”indexed_in“:[“crossref”],”open_access“:{”is_oa“:false”“,”oa_url“:空,”any_repository_has_fulltext“:false},”作者身份“:[{”作者位置“:”第一个“,”作者“:{”id“:”https://openalex.org/A5035060247“,”display_name“:”Pedro A.Ortega“,”orcid“:null},”institutions“:[{”id“:”https://openalex.org/I241749“,”display_name“:”剑桥大学“,”ror“:”https://ror.org/013meh722“,”country_code“:”GB“,”type“:“教育”,”世系“:[”https://openalex.org/I241749“]}],”国家“:[”GB“],”is_corresponding“:false,”raw_author_name“:”Pedro Alejandro Ortega“,”raw _affiliation_strings“:[“英国剑桥大学工程系,剑桥,特朗平顿街,CB2 1PZ”],”affiliations“:”剑桥大学工程系,剑桥Trumpington Street,Cambridge,CB2 1PZ,UK“,”机构ID“:[”https://openalex.org/I241749“]}]},{”author_position“:”middle“,”author“:{”id“:”https://openalex.org/A500197565“,”display_name“:”Daniel A.Braun“,”orcid“:”https://orcid.org/0000-0002-8637-6652},“机构”:[{“id”:https://openalex.org/I241749“,”display_name“:”剑桥大学“,”ror“:”https://ror.org/013meh722“,”country_code“:”GB“,”type“:“教育”,”世系“:[”https://openalex.org/I241749“]}],”国家“:[”GB“],”is_corresponding“:false,”raw_author_name“:”Daniel Alexander Braun“,”raw _affiliation_strings“:[“英国剑桥大学工程系,剑桥,特朗平顿街,CB2 1PZ”],”affiliations“:”英国剑桥大学工程系,剑桥特朗平顿街,CB2 1PZ”,“institution_ids”:[“https://openalex.org/I241749“]}]},{”author_position“:”last“,”author“:{”id“:”https://openalex.org/A5020836373“,”display_name“:”Simon Godsill“,”orcid“:null},”institutions“:[{”id“:”https://openalex.org/I241749“,”display_name“:”剑桥大学“,”ror“:”https://ror.org/013meh722“,”country_code“:”GB“,”type“:“教育”,”世系“:[”https://openalex.org/I241749“]}],”countries“:[”GB“],”is_corresponding“:false,”raw_author_name“:”Simon Godsill“,”raw _affiliation_strings“:【”剑桥大学工程系,剑桥特朗平顿街,CB2 1PZ,英国“】,”affiliations“:[{”raw _affiliation_string“:”剑桥大学工程学系,剑桥,特朗平顿街,英国CB2 1P Z,“,”机构ID“:[”https://openalex.org/I241749“]}]}],”countries_distinct_count“:1,”institutions_disting_count”:1,“corresponding_author_ids”:[],”correspounding_institution_ids“:[]、”apc_list“:{“value”:5000,”currency“:”EUR“,”value_usd“:5392,”provenance“:”doaj“},”apc_payed“:{”value“:5000,“currencurrency”:“EUR”,”value_ usd“:5392,“provenance”:“doaj”},“has_fulltext”:false,“cited_by_count”:0,“cited_by_percentile_year“:{“min”:0,“max”:69},“biblio”:{”volume“:null,”issue“:nul,”first_page“:“281”,“last_page”:“285”},”is_retracted“:false,”is_paratext“:false,”primary_topic“:”{“id”:“https://openalex.org/T10462“,”display_name“:”强化学习算法“,”score“:0.9987,”subfield“:{”id“:”https://openalex.org/subfields/1702“,”display_name“:”人工智能“},”字段“:{”id“:”https://openalex.org/fields/17“,”display_name“:”Computer Science“},”domain“:{”id“:”https://openalex.org/domains/3“,”display_name“:”物理科学“}},”主题“:[{”id“:”https://openalex.org/T10462“,”display_name“:”强化学习算法“,”score“:0.9987,”subfield“:{”id“:”https://openalex.org/subfields/1702“,”display_name“:”人工智能“},”字段“:{”id“:”https://openalex.org/fields/17“,”display_name“:”Computer Science“},”domain“:{”id“:”https://openalex.org/domains/3“,”display_name“:”物理科学“}},{”id“:”https://openalex.org/T12101“,”“display_name”:“多臂盗贼问题的优化”,“score”:0.9952,“subfield”:{“id”:“https://openalex.org/subfields/1803“,”display_name“:”管理科学与运筹学“},”字段“:{”id“:”https://openalex.org/fields/18“,”display_name“:”Decision Sciences“},”domain“:{”id“:”https://openalex.org/domains/2“,”“display_name”:“社会科学”}},{”id“:”https://openalex.org/T12794“,”“display_name”:“最优控制的自适应动态规划”,“score”:0.9793,“subfield”:{“id”:“https://openalex.org/subfields/1703“,”display_name“:”计算理论与数学“},”字段“:{”id“:”https://openalex.org/fields/17“,”display_name“:”Computer Science“},”domain“:{”id“:”https://openalex.org/domains/3“,”display_name“:”物理科学“}}],”关键词“:[{”id“:”https://openalex.org/keywords/reinforcement-learning网站“,”display_name“:”强化学习“,”score“:0.630598},{”id“:”https://openalex.org/keywords/actor-critic-algorithm网站“,”display_name“:”参与者关键算法“,”score“:0.562341},{”id“:”https://openalex.org/keywords/adaptive-dynamic编程“,”display_name“:”自适应动态编程“,”score“:0.561337},{”id“:”https://openalex.org/keywords/bathand-optimization网站“,”display_name“:”Bandit Optimization“,”score“:0.557015},{”id“:”https://openalex.org/keywords/multi-gent-systems网站“,”display_name“:”Multi-Agent Systems“,”score“:0.521803}],”concepts“:[{”id“:”https://openalex.org/C97541855,“wikidata”:https://www.wikidata.org/wiki/Q830687“,”display_name“:”强化学习“,”level“:2,”score“:0.8616339},{”id“:”https://openalex.org/C41008148,“wikidata”:https://www.wikidata.org/wiki/Q21198“,”display_name“:”计算机科学“,”level“:0,”score“:0.7957767},{”id“:”https://openalex.org/C107673813,“wikidata”:https://www.wikidata.org/wiki/Q812534“,”display_name“:”贝叶斯概率“,”level“:2,”score“:0.5743195},{”id“:”https://openalex.org/C2777212361,“wikidata”:https://www.wikidata.org/wiki/Q5127848“,”display_name“:”类(哲学)“,”级别“:2,”分数“:0.5279414},{”id“:”https://openalex.org/C77618280,“wikidata”:https://www.wikidata.org/wiki/Q1155772“,”display_name“:”Scheme(数学)“,”level“:2,”score“:0.52232355},{”id“:”https://openalex.org/C154945302,“wikidata”:https://www.wikidata.org/wiki/Q11660“,”display_name“:”人工智能“,”level“:1,”score“:0.5186864},{”id“:”https://openalex.org/C2780586882,“wikidata”:https://www.wikidata.org/wiki/Q7520643“,”display_name“:”Simple(哲学)“,”level“:2,”score“:0.51737684},{”id“:”https://openalex.org/C107524782,“wikidata”:https://www.wikidata.org/wiki/Q40164“,”display_name“:”Lever“,”level“:2,”score“:0.5007074},{”id“:”https://openalex.org/C2775924081,“wikidata”:https://www.wikidata.org/wiki/Q55608371“,”display_name“:”Control(management)“,”level“:2,”score“:0.42861432},{”id“:”https://openalex.org/C126255220,“wikidata”:https://www.wikidata.org/wiki/Q141495“,”display_name“:”数学优化“,”level“:1,”score“:0.38123915},{”id“:”https://openalex.org/C33923547,“wikidata”:https://www.wikidata.org/wiki/Q395“,”display_name“:”数学“,”等级“:0,”分数“:0.12950248},{”id“:”https://openalex.org/C138885662,“wikidata”:https://www.wikidata.org/wiki/Q5891“,”display_name“:”哲学“,”等级“:0,”分数“:0.0},{”id“:”https://openalex.org/C121332964,“wikidata”:https://www.wikidata.org/wiki/Q413“,”display_name“:”物理“,”级别“:0,”分数“:0.0},{”id“:”https://openalex.org/C134306372,“wikidata”:https://www.wikidata.org/wiki/Q7754“,”display_name“:”数学分析“,”level“:1,”score“:0.0},{”id“:”https://openalex.org/C111472728,“wikidata”:https://www.wikidata.org/wiki/Q9471“,”display_name“:”认识论“,”level“:1,”score“:0.0},{”id“:”https://openalex.org/C62520636,“wikidata”:https://www.wikidata.org/wiki/Q944“,”display_name“:”量子力学“,”level“:1,”score“:0.0}],”mesh“:[],”locations_count“:1.”locations“:[{”is_oa“:false,”landing_page_url“:”https://doi.org/10.1007/978-3-642-22887-2_30“,”pdf_url“:空,”源“:{”id“:”https://openalex.org/S106296714“,”“display_name”“:”“计算机科学课堂讲稿”“,”issn_l“:”0302-9743“,”isn“:[”0302-7743“、”1611-3349“],”is_oa“:false,”is_in_doaj“:false,”host_organization“:”https://openalex.org/P4310318900“,”“host_organization_name”:“Springer Science+Business Media”,“host_organization_lineage”:[“https://openalex.org/P4310319965","https://openalex.org/P4310318900“],”host_organization_lineage_names“:[”Springer Nature“,”Springer Science+Business Media“],”type“:”系列丛书“},”license“:null,”license_id“:null,”版本“:null,”is_accepted“:false,”is_published“:false}],”best_oa_location“:null,”可持续发展目标“:[],”授予“:[],”数据集“:[],”版本“:[],”referenced_works_count“:8,”referenced_works“:[”https://openalex.org/W1663973292","https://openalex.org/W2091565802","https://openalex.org/W2091592811","https://openalex.org/W2142176084","https://openalex.org/W2164569010","https://openalex.org/W4214717370","https://openalex.org/W4246270964","https://openalex.org/W4307347247“],”related_works“:[”https://openalex.org/W809494831","https://openalex.org/W4379985877","https://openalex.org/W4250682053","https://openalex.org/W3208483585","https://openalex.org/W2996939057","https://openalex.org/W2362409171","https://openalex.org/W2341184654网址","https://openalex.org/W2322242318","https://openalex.org/W2319849381","https://openalex.org/W2069464471“],”ngrams_url“:”https://api.openalex.org/works/W1595271225/ngrams网站“,”abstract_inverted_index“:{”We“:[0],”present“:[1],”an“:[2,27,57,77],”actor-critic“:[3],”scheme“:[4],”for“:[5,42],”reinforction“:[6],”learning“:[7],”in“:[8],”complex“:[9,46],”domains.“:[10],”The“:[11],”main“:[12],”contribution“:%13],”is“:[14,62,81],”to“:[15,32,55,66,85],”show“:[16],”that“:[17,26,61,80],”planning“:[18,29],”and“:[19],”I/O“:[20],”动力学”:[21],“可以”:[22],“被”:[23],“分离”:[24],“这样”:[25],“棘手”:[28],“问题”:[30],“减少”:[31],“a”:[33,43,67,86],“简单”:[34],“多武装”:[35],“土匪”:[36,59,72],“问题”:[37],“其中”:[38],“每个”:[39],“杠杆”:[40],“支架”:[41],“潜在”:[44],“任意”:[45],“政策。“:[47],”“此外,”:[48],“我们”:[49],“使用”:[50],“the”:[51],“Bayesian”:[52],“control”:[53],“rule”:[54],“construct”:55],“adaptive”:58,78],“player”:60],“universal”:[63,82],“with”:【64,83】,“respect”:[65,84],“givent”:[68,87],“class”:[69,88],“of”:[70,89],“最优”:[71],“玩家”:[73],“因此”:[74],“间接”:[75],“构造”:[76],“代理人”:[79],“政策。“:[90]},”cited_by_api_url“:”https://api.openalex.org/works?filter=cites:W1595271225“,”counts_by_year“:[],”updated_date“:”2024-06-14T14:36:11.147861“,”创建日期“:”2016-06-24“}