｛“状态”：“正常”，“消息类型”：“工作”，“消息版本”：“1.0.0”，“消息”：｛“索引”：｛“日期部分”：[[2023,9,13]，“日期时间”：“2023-09-13T17:32:47Z”，“时间戳”：1694626367198｝，“引用计数”：26，“发布者”：“电气和电子工程师协会（IEEE）”，“问题”：“5”，“许可证”：[｛“开始”：｛“日期部分”：[[1997,11]，“日期时间”：“1997-01-01T00:00:00Z”，“timestamp”：852076800000}，“content-version”：“vor”，“delay-in-days”：0，“URL”：“https:\/\/ieeexplore.iee.org\/Xplorehelp\/downloads\/license-information\/ieee.html”}]，“content-domain”：{“domain”:[]，“crossmark-restriction”：false}，”shortcontainer-title“：[”ieee Trans.Syst.，Man，Cybern.A“]，”publish ed-print“：{”日期-部分“：[1997]]}，“DOI”：“10.1109\/3468.618258”，“type”：“journal-article”，“created”：{“date-parts”：[[2002,8,24]]，“date-time”：“2002-08-24T19:45:59Z”，“timestamp”：1030218359000}，“page”：《588-600》，“source”：，“author”：[{“given”：“G.”，“family”：“Santharam”，“sequence”：“first”，“affiliation”：[]}，{“Givent”：“P.S.”，”family“：”Sastry“，”sequence“：”additional“，”affiliation:[]}]，“member”：“263”，“reference”：[[{”key“：”ref10“，”author“：”williams“，”“year”：“1993”，“journal title”：“分析政策迭代的一些增量变体理解actor-critic学习系统的第一步”}，{“key”：“ref11”，“author”：“howard”，“year”：“1960”，“journal-title”：“Dynamic Programming and Markov Processes”}、{“密钥”：“ref12”，“doi-asserted-by”：“publisher”，《doi》：“10.1137\/0323023”}，“DOI”：“10.1109\/TAC.1982.1103017”}，{“key”：“ref14”，“DOI-asserted-by”：“publisher”，“DOI:”10.1109\/21.21595“}，“key“：”ref15“，”DOI-assert-by“：”publisher“，”DOI“：”10.1109\/TAC.196.1104342“}”，{《key》：“ref16”，“article-title”：“关于强化学习的计算经济学”，“author”：“barto”，“year”：“1990年”，“新闻标题”：“Proc 1990 Connectionism Models暑期学校”}，{“key”：“ref17”，“author”：“barto”，“year”：“1991”，“journal-title”：“使用异步动态编程的实时学习和控制”}{“键”：“ref4”，“doi-asserted-by”：“publisher”，“doi”：“10.1109\/TSMC.1987.289334”}，{“key”：“ref3”，“author”：“ross”，“year”：“1970”，“journal-title”：“带优化应用的应用概率模型”}：“ref5”，“doi-asserted-by”：“publisher”，“doi”：“10.1109\/TSMC.1985.6313371”}，{“key”：“ref8”，“author”：“phansalkar”，“year”：“1991”，“journal-title”：“联结主义系统的学习自动机算法&#x2014局部和全局收敛”}，{“key”：“ref2”，“doi-asserted-by”：“publisher”，“doi”：“10.1109\/TSMC.1983.6313077”}，{“key”：“ref9”，“author”：“sutton”，“year”：“1984”，“journal-title”：“reinforcement learning中的临时学分分配”}：“beneveniste”，“年份”：“1987”，“journal-title”：“自适应算法和随机逼近”}，{“key”：“ref22”，“doi-asserted-by”：“crossref”，“author”：“rouche”，《年份》：“1977年”，“johnal-title:”Liapunov直接方法应用数学科学的稳定性理论-22“，“doi”：“10.1007\/9781-4684-9362-7”}：“随机过程的逼近和弱收敛方法”}，{“key”：“ref24”，“doi-asserted-by”：“publisher”，“doi”：“10.1007\/BF00115009”}：“199”，“DOI”：“10.1090\/trans2\/042\/13”，“article-title”：“右侧不连续的微分方程”，“volume”：“42”，“author”：“filippov”，“year”：“1968”，“journal-title“：”Amer Math-Soc Translations Series 2“}，{“key”：”ref25“，”author“：”borkar“，”year“：”1991“，”journal-title“：“数学系列240中受控马尔可夫链的主题皮特曼研究笔记”}]，“容器-时间”：[“IEEE系统、人和控制论事务-A部分：系统和人类”]，“原始标题”：[]，“链接”：[{“URL”：“http://\/xplorestagins.IEEE.org\/ielx1\/3468\/10061858.pdf？arnumber=618258”，“内容类型”：“未指定”，“content-version”：“vor”，“intended-application”：“similarity-checking”}]，“deposed”：{“date-parts”：[[2021,11,29]]，“date-time”：“2021-11-29T20:06:54Z”，“timestamp”：1638216414000}，“score”：1，“resource”：{“primary”：“{”URL：“http://ieeexplore.iee.org\/document\/61858\/”}}，”副标题：[]，“shorttitle”：[]，“已发布”：{“date-parts”：[1997]]}，“references-count”：26，“日志问题”：{“问题”：“5”}，“URL”：“http://\/dx.doi.org\/10.109\/3468.618258”，“关系”：{}，”ISSN“：[”1083-4427“]，”ISSN-type“：[{”value“：”1083-44287“，”type“:”print“}]，”subject“：【】，”published“：{”date-parts“：【1997】}}}