{“id”:“https://openalex.org/W2104830490“,”doi“:”https://doi.org/10.109/3468.618258“,”title“:“用于马尔可夫链自适应控制的强化学习神经网络”,“display_name”:“用于自适应控制马尔可夫链条的强化学习神经元网络”,”publication_year“:1997,”publitation_date“:”1997-01-01“,”ids“:{”openalex“:”https://openalex.org/W2104830490“,”doi“:”https://doi.org/10.109/3468.618258“,”mag“:”2104830490“},”language“:”en“,”primary_location“:{”is_oa“:false,”landing_page_url“:”https://doi.org/10.109/3468.618258“,”pdf_url“:空,”源“:{”id“:”https://openalex.org/S4210201610网址“,”“display_name”:“IEEE系统人与控制论事务处理-A部分系统与人类”,“issn_l”:“1083-4427”,“isn”:[“1083-4428”,“1558-2426”],“is_oa”:false,“is_ in_doaj”:false,“is_core”:true,“host_organization”:“https://openalex.org/P4310318808“,”“host_organization_name”:“电气与电子工程师学会”,“host_ordanization_lineage”:[“https://openalex.org/P4310318808“],”host_organization_lineage_names“:[”电气与电子工程师协会“],“type”:“journal”},“license”:null,“licence_id”:null,“version”:nully,“is_accepted”:false,“is_published”:false},”type“:”article“,”type_crossref“:“jornal-article”,“indexed_in”:[”crossref“]any_repository_has_fulltext“:false},”作者身份“:[{”作者位置“:”第一个“,”作者“:{”id“:”https://openalex.org/A5016932375“,”display_name“:”G.Santharam“,”orcid“:null},”institutions“:[{”id“:”https://openalex.org/I1333370159“,”display_name“:”摩托罗拉(美国)“,”ror“:”https://ror.org/01hafxd32“,”country_code“:”US“,”type“:“company”,”lineage“:[”https://openalex.org/I1333370159“]],”国家“:[”美国“],”is_corresponding“:false,”raw_author_name“:”G.Santharam“,”raw_affiation_strings“:[”摩托罗拉印度电子公司,印度班加罗尔“],”附属机构“:[{”raw_affiation_string“:”摩托罗拉印度电子公司,印度班加罗尔“,”institution_ids“:[”https://openalex.org/I1333370159“]}]},{”author_position“:”last“,”author“:{”id“:”https://openalex.org/A5029280918“,”display_name“:”P.S.Sastry“,”orcid“:”https://orcid.org/0000-0001-7863-8088“},”机构“:[],”国家“:[[],”is_corresponding“:false,”raw_author_name“:P.S.Sastry”,“raw_affiliation_strings”:[]has_fulltext“:true,”fulltext_origin“:”ngrams“,”cited_by_count“:18,”citation_normalized_percentile“:{”value“:0.708211,”is_in_top_1_percent“:false,”is_ in_top_ 10_percents“:false},”citted_by_percentile_year“:{”min“:83,”max“:84},“biblio”:{“volume”:“27”,“issue”:“5”,“first_page”:“588”,“last_page”:”“600”},:false,“is_paratext”:false,“primary_topic”:{“id”:“https://openalex.org/T12794“,”“display_name”:“最优控制的自适应动态规划”,“score”:0.9987,“subfield”:{“id”:“https://openalex.org/subfields/1703“,”display_name“:”计算理论与数学“},”字段“:{”id“:”https://openalex.org/fields/17“,”display_name“:”Computer Science“},”domain“:{”id“:”https://openalex.org/domains/3“,”display_name“:”物理科学“}},”主题“:[{”id“:”https://openalex.org/T12794“,”“display_name”:“最优控制的自适应动态规划”,“score”:0.9987,“subfield”:{“id”:“https://openalex.org/subfields/1703“,”display_name“:”计算理论与数学“},”字段“:{”id“:”https://openalex.org/fields/17“,”display_name“:”Computer Science“},”domain“:{”id“:”https://openalex.org/domains/3“,”display_name“:”物理科学“}},{”id“:”https://openalex.org/T10462“,”display_name“:”强化学习算法“,”score“:0.9976,”subfield“:{”id“:”https://openalex.org/subfields/1702“,”display_name“:”人工智能“},”字段“:{”id“:”https://openalex.org/fields/17“,”display_name“:”Computer Science“},”domain“:{”id“:”https://openalex.org/domains/3“,”display_name“:”物理科学“}},{”id“:”https://openalex.org/T10320“,”display_name“:”神经网络基础与应用“,”score“:0.9898,”subfield“:{”id“:”https://openalex.org/subfields/1702“,”display_name“:”人工智能“},”字段“:{”id“:”https://openalex.org/fields/17“,”display_name“:”Computer Science“},”domain“:{”id“:”https://openalex.org/domains/3“,”display_name“:”物理科学“}}],”关键词“:[{”id“:”https://openalex.org/keywords/stoachamic-game(https://openalex.org/关键词/随机游戏)“,”display_name“:”随机游戏“,”score“:0.7903746},{”id“:”https://openalex.org/keywords/q-learning网站“,”display_name“:”Q-learning“,”score“:0.6180101},{”id“:”https://openalex.org/keywords/reinforcement-learning网站“,”display_name“:”强化学习“,”score“:0.615079},{”id“:”https://openalex.org/keywords/adaptive-dynamic编程“,”display_name“:”自适应动态编程“,”score“:0.562374},{”id“:”https://openalex.org/keywords/optimal-control(https://openalex.org/关键词/最优控制)“,”display_name“:”最优控制“,”score“:0.540676}],”concepts“:[{”id“:”https://openalex.org/C97541855,“wikidata”:https://www.wikidata.org/wiki/Q830687网址“,”display_name“:”强化学习“,”level“:2,”score“:0.81217873},{”id“:”https://openalex.org/C22171661,“wikidata”:https://www.wikidata.org/wiki/Q1074380“,”display_name“:”随机游戏“,”level“:2,”score“:0.7903746},{”id“:”https://openalex.org/C106189395,“wikidata”:https://www.wikidata.org/wiki/Q176789“,”display_name“:”Markov决策过程“,”level“:3,”score“:0.6657324},{”id“:”https://openalex.org/C50644808,“wikidata”:https://www.wikidata.org/wiki/Q192776“,”display_name“:”人工神经网络“,”level“:2,”score“:0.647013},{”id“:”https://openalex.org/C98763669,“wikidata”:https://www.wikidata.org/wiki/Q176645“,”display_name“:”Markov链“,”level“:2,”score“:0.64515436},{”id“:”https://openalex.org/C188116033,“wikidata”:https://www.wikidata.org/wiki/Q2664563“,”display_name“:”Q-learning“,”level“:3,”score“:0.6180101},{”id“:”https://openalex.org/C41008148,“wikidata”:https://www.wikidata.org/wiki/Q21198“,”display_name“:”计算机科学“,”level“:0,”score“:0.5784872},{”id“:”https://openalex.org/C2779343474,“wikidata”:https://www.wikidata.org/wiki/Q3109175“,”display_name“:”Context(考古学)“,”level“:2,”score“:0.5652359},{”id“:”https://openalex.org/C107464732,“wikidata”:https://www.wikidata.org/wiki/Q235781“,”display_name“:”自适应控制“,”level“:3,”score“:0.5209293},{”id“:”https://openalex.org/C126255220,“wikidata”:https://www.wikidata.org/wiki/Q141495“,”display_name“:”数学优化“,”level“:1,”score“:0.5055444},{”id“:”https://openalex.org/C203479927,“wikidata”:https://www.wikidata.org/wiki/Q5165939(网址:https://www.wikidata.org/wiki/Q5165939)“,”display_name“:”控制器(灌溉)“,”level“:2,”score“:0.49445343},{”id“:”https://openalex.org/C91575142,“wikidata”:https://www.wikidata.org/wiki/Q1971426“,”display_name“:”最优控制“,”level“:2,”score“:0.4509298},{”id“:”https://openalex.org/C159886148,“wikidata”:https://www.wikidata.org/wiki/Q176645“,”display_name“:”Markov过程“,”level“:2,”score“:0.42809588},{”id“:”https://openalex.org/C48103436,“wikidata”:https://www.wikidata.org/wiki/Q599031“,”display_name“:”State(computer science)“,”level“:2,”score“:0.41155007},{”id“:”https://openalex.org/C47446073,“wikidata”:https://www.wikidata.org/wiki/Q5165890“,”display_name“:”控制理论(社会学)“,”level“:3,”score“:0.38528386},{”id“:”https://openalex.org/C33923547,“wikidata”:https://www.wikidata.org/wiki/Q395“,”display_name“:”数学“,”等级“:0,”分数“:0.30794713},{”id“:”https://openalex.org/C2775924081,“wikidata”:https://www.wikidata.org/wiki/Q55608371“,”display_name“:”Control(management)“,”level“:2,”score“:0.30654567},{”id“:”https://openalex.org/C154945302,“wikidata”:https://www.wikidata.org/wiki/Q11660“,”display_name“:”人工智能“,”level“:1,”score“:0.25275075},{”id“:”https://openalex.org/C119857082,“wikidata”:https://www.wikidata.org/wiki/Q2539“,”display_name“:”机器学习“,”level“:1,”score“:0.18524647},{”id“:”https://openalex.org/C11413529,“wikidata”:https://www.wikidata.org/wiki/Q8366“,”display_name“:”Algorithm“,”level“:1,”score“:0.1360929},{”id“:”https://openalex.org/C144237770,“wikidata”:https://www.wikidata.org/wiki/Q747534“,”display_name“:”数理经济学“,”level“:1,”score“:0.079203695},{”id“:”https://openalex.org/C6557445,“wikidata”:https://www.wikidata.org/wiki/Q173113“,”display_name“:”农学“,”level“:1,”score“:0.0},{”id“:”https://openalex.org/C86803240,“wikidata”:https://www.wikidata.org/wiki/Q420“,”display_name“:”生物学“,”等级“:0,”分数“:0.0},{”id“:”https://openalex.org/C151730666,“wikidata”:https://www.wikidata.org/wiki/Q7205(网址:https://www.wikidata.org/wiki/Q7205)“,”display_name“:”古生物学“,”level“:1,”score“:0.0},{”id“:”https://openalex.org/C105795698,“wikidata”:https://www.wikidata.org/wiki/Q12483“,”display_name“:”Statistics“,”level“:1,”score“:0.0}],”mesh“:[],”locations_count“:1.”locations“:[{”is_oa“:false,”landing_page_url“:”https://doi.org/10.109/3468.618258“,”pdf_url“:空,”源“:{”id“:”https://openalex.org/S4210201610“,”“display_name”:“IEEE系统人与控制论事务处理-A部分系统与人类”,“issn_l”:“1083-4427”,“isn”:[“1083-4428”,“1558-2426”],“is_oa”:false,“is_ in_doaj”:false,“is_core”:true,“host_organization”:“https://openalex.org/P4310318808“,”“host_organization_name”:“电气与电子工程师学会”,“host_ordanization_lineage”:[“https://openalex.org/P4310318808“],”host_organization_lineage_names“:[”电气与电子工程师协会“],“type”:“journal”},“license”:null,“licence_id”:null',“version”:null,“is_accepted”:false,“is_published”:false}],“best_oa_location”:nul,“sustainable_development_goals”:[],“grants”:[],“datasets”:[/],“versions”:[]],“referenced_works_count”:23,“referrenced_works”:]”https://openalex.org/W1505136099","https://openalex.org/W1524028581","https://openalex.org/W1538558539","https://openalex.org/W1568229137","https://openalex.org/W1586172133","https://openalex.org/W1592648094","https://openalex.org/W1982997797","https://openalex.org/W1991513691","https://openalex.org/W2015667537","https://openalex.org/W2021801581","https://openalex.org/W2023745809","https://openalex.org/W2028145673","https://openalex.org/W2091565802","https://openalex.org/W2100677568","https://openalex.org/W2101130101","https://openalex.org/W2101927907","https://openalex.org/W2104830490","https://openalex.org/W2115447855","https://openalex.org/W2321292752","https://openalex.org/W3041202696","https://openalex.org/W32403112","https://openalex.org/W374079423","https://openalex.org/W4238000550“],”related_works“:[”https://openalex.org/W4380550992","https://openalex.org/W4322760752","https://openalex.org/W3167472281","https://openalex.org/W3096874164","https://openalex.org/W2970347269","https://openalex.org/W2808418668","https://openalex.org/W2472051997","https://openalex.org/W2357975469","https://openalex.org/W2146763310","https://openalex.org/W2101748387“],”abstract_inverted_index“:{”In“:[0,15],”this“:[1,16],”paper“:[2],”we“:[3,18],”consider“:[4],”the“:[5,20,52,58,69,74110121],”problem“:[6,21],”of“:[7,22,25,33,73,98120],”reinforcement“:[8],“learning”:[9,91,94],”In“:[10117],”a“:[11,30,79,89125],”dynamical“:[12],“变化”:[13],“环境”:[14],“上下文”:[17],“研究”:[19],“自适应“:[23104],”控制“:[24,81],”有限状态“:[26],”马尔可夫“:[27122],”链“:[28],”with“:[29124],”finited“:[31],”number“:[32],”控制。“:[34],”The“:[35,42,71],”transition“:[36],”and“:[37],”payoff“:[38,56],”structures“:[39],”are“:[40,83],”unknown。“:[41],”目标“:[43],”是“:[44,66107],”到“:[45],”查找“:[46],”an“:[47103114],”最佳“:[48115],”策略“:[49],”其中“:[50,77],”最大化“:[51],”期望“:[53],”总计“:[54],”折扣“:[55],”超过“:[57],”无限“:[59],”地平线。“:[60],”A“:[61],”随机“:[62],”神经“:[63,75],”网络“:[64],”模型“:[65],”建议“:[67],”用于“:[68],”控制器。“:[70],”parameters“:[72101],”net,“:[76],”determine“:[78],”random“:[80],”strategy“:[82],”updated“:[84],”at“:[85],”each“:[86118],”instant“:[87],”using“:[88102],”simple“:[90],”scheme。“:[92],”此“:[93],”方案“:[95],”涉及“:[96],”估计“:[97],”一些“:[99],”相关“:[100],”批评家。“:[105],”It“:[106],”proved“:[108],”that“:[109],”controller“:[111],”渐近“:[112],”chooses“:[113],”action“:[116],”state“:[119],”chain“:[123],”high“:[126],”概率。“:[127]},”cited_by_api_url“:”https://api.openalex.org/works?filter=cites:W2104830490“,”counts_by_year“:[{”年“:2023,”cited_by_count“:1}],”更新日期“:”2024-08-13T17:19:42.712549“,”创建日期“:“2016-06-24”}