{“状态”:“确定”,“消息类型”:“工作”,“信息版本”:“1.0.0”,“邮件”:{“索引”:{-“日期-部件”:[[2024,2,28]],“日期-时间”:“2024-02-28T00:59:12Z”,“时间戳”:1709081952041},“引用-计数”:31,“发布者”:“爱思唯尔BV”,“许可证”:[{“开始”:{-date-parts”:[[2023,9,1]],《日期-时间》:“2023-09-01T00:00:00Z“,”timestamp“:1693526400000},”content-version“:“tdm”,“delay-in-days”:0,“URL”:“https:\/\/www.elsevier.com/tdm\/userlicense\/1.0\/”},{“start”:{“date-parts”:[[2024,5,30]],“date-time”:“2024-05-30T00:00:00Z”,“timestamp”:1717027200000},“content-version”:“am”,“delay-in-days”:272,“URL”:“http://www.elseviers.com/open-access\/userlicense”\/1.0\/“},{“开始”:{“日期部分”:[[2023,9,1]],“日期时间”:“2023-09-01T00:00:00Z”,“timestamp”:1693526400000},“content-version”:“stm-asf”,“delay-in-days”:0,“URL”:“https:\/\/doi.org\/10.15223\/policy-017”},{“start”:{“date-parts”:[[2023,9,1]],“date-time”:“2023-09-01T00:00Z”,delay-in-days“:0,”URL“:”https:\/\/doi.org\/10.15223\/policy-037“},{“start”:{“date-parts”:[[2023,9,1]],“date-time”:“2023-09-01T00:00:00Z”,“timestamp”:1693526400000},“content-version”:“stm-asf”,“delay-in-days”:0,“URL”:“https:\/\/doi.org\/10.15223\/policy-012”},{“start”:{(日期-部件):[[2023,9,1],“日期-时间”:“2020 3-09-01T_00:00:0Z”,”“timestamp:”1693526400000},“content-version”:“stm-asf”,“delay-in-days”:0,“URL”:“https:\/\/doi.org\/10.15223\/policy-029”},{“start”:{“date-parts”:[[2023,9,1]],“date-time”:“2023-09-01T00:00:00Z”,“timestamp”:1693526400000},“content-version”:“stm-asf”,“delay-in-days”:0,“URL”:“http:\/\/doi.org\/10.15223\/policy-004”}],“content-domain”:{“domain”:[“els”evier.com“,”sciencedirect.com“],”crossmark-restriction“:true},“short-container-title”:[“计算物理杂志”],“published-print”:{“date-parts”:[[2023,9]]},“DOI”:“10.1016\/j.jcp.2023.112238”,“type”:“Journal-article”,“created”:{“date-ports”:[[2023,5,19]],“date-time”:“2023-05-19T15:13:28Z”,“timestamp”:16845092080000},”page:“112238“,”update-policy“http://\”/dx.DOI.org \/10.1016 \/elsevier_cm_policy“,“source”:“Crossref”,“is-referenced-by-count”:1,“title”:[“参数化环境下强化学习的随机最大原理方法”],“前缀”:“10.1016”,“卷”:“488”,“作者”:[{“给定”:“Richard”,“家族”:“Archibald”,”sequence“:”first“从属关系”:[]},{“ORCID”:”http://\ORCID.org\/00000-0002-1302-8120“,”authenticated-ORCID“:false,“given”:“Feng”,“family”:“Bao”,“sequence”:“additional”,“affiliation”:[]},{“givent”:“Jiongmin”,“家族”:“Yong”,“serquence”:“additionable”,“filiation“:[]2],“member”:“78”,“reference”:[{“issue”:“3”,“key”:”10.1016\/j.jcp.2023.112238_br0010“,”“doi-asserted-by”:”crossref“,“first page”:“269”,“doi”“:”10.1111\/j.1467-9868.2009.00736.x“,”文章标题“:“粒子马尔可夫链蒙特卡罗方法”,“卷”:“72”,“作者”:“Andrieu”,“年份”:“2010”,“日志标题”:“J.R.Stat.Soc.B”},{“键”:“10.1016\/J.jcp.2023.112238_br0020”,“doi-asserted-by”:“crossref”,“doi”:“101016\/jcp.2019.108871”,“文章标题”:阿奇博尔德“,“年”:“2019”,“journal-title”:“J.Compute.Phys.”},{“key”:“10.1016\/J.jcp.2023.112238_br0030”,“author”:“Archibald”}计算。“},{“key”:“10.1016\/j.jcp.2023.112238_br0050”,“article-title”:“长短期记忆强化学习”,“volume”:”14“,“author”:“Bakker”,“year”:“2001”,“journal-title“:”Adv.Neural Inf.Process.Syst.“},{“key”:《高级神经信息处理系统》,“10.1016”:“合成数据的数据同化作为预测斑秃疾病进展的新策略”,“作者”:“Bao”,“年份”:“2021年”,“期刊标题”:“数学、医学、生物学”},{“问题”:“1”,“关键”:“10.1016\/j.jcp.2023.112238_br0070”,“doi-asserted-by”:“crossref”,“首页”:“413”,“doi”:“101137\/14095546X”,“article-title”:“反向双随机微分方程的一阶格式”,“volume”:“4”,“author”:“Bao”,“year”:“2016”,“journal-title”:“SIAM\/ASA J.Uncertain.Quantification.”},{“issue”:“3”,“key”:”10.1016\/J.jcp.2023.112238_br0080“,”doi-asserted-by“:”crossref“,”first page“:”736“,”doi“:”10.1109\/78.984773“,”article-title“:“从业者粒子滤波方法收敛结果调查”,“volume”:“50”,“author”:“Crisan”,“year”:“2002”,“journal-title”:“IEEE Trans.Signal Process.”},{“key”:”10.1016\/j.jcp.2023.112238_br0090“,”doi-asserted-by“:”crossref“,”doi“:”10.116\/j.actamat.2020.116508“,”article-title“:“通过电子束诱导的单原子动力学探索势能景观”,“卷”:“203”,“作者”:“Dyck”,“年份”:“2021”,“期刊标题”:“材料学报”},{“关键”:“10.1016\/j.jcp.2023.112238_br0100”,“系列标题”:《第六届国际模糊系统会议论文集》,第2卷,“第一页”:“659”,“文章标题”:”模糊q-learning“,”author“:“Glorennec”,“year”:“1997”},{“issue”:“6”,“key”:“10.1016\/j.jcp.2023.112238_br0110”,“doi-asserted-by”:“crossref”,“first page”:《2982》,“doi”:“101137\/17M1123559”,“article-title”:“一种求解随机最优控制问题的有效梯度投影方法”,“volume”::“55”,“author”:“Gong”,“年份”:“2017”,“日记标题”:“SIAM j.Numer.Anal.“},{”问题“:“2”,“key”:“10.1016\/j.jcp.2023.112238_br0120”,“首页”:“107”,“文章标题”:“非线性非高斯贝叶斯状态估计的新方法”,“卷”:“140”,“作者”:“Gordon”,“年份”:“1993”,“期刊标题”:《IEE Proc.F》},{“key“10.1016”,“jcp.2023.112238_br0130”:“2829”,“article-title”:“基于模型加速的连续深度q学习”,“author”:“Gu”,“year”:“2016”},{“key”:“10.1016\/j.jcp.2023.112238_br0140”,“doi-asserted-by”:“crossref”,”doi“:”10.1038\/539485a“,”article-title“:“Fire up the atom forge”,”author“:”Kalinin“,”year“2016”,”journal title“:”Nature“},”{“key”:“10.1016\/j.jcp.2023.112238_br0150”,“article-title”:“随机微分方程的数值解”,“volume”:“vol.23”,“author”:“Kloeden”,“year”:“1992”},{“key”::“Viraj Mehta,Biswajit Paria,Jeff Schneider,Stefano Ermon,Willie Neiswanger,基于模型的强化学习的实验设计观点,2021.”},{“问题”:“4”,“关键”:“10.1016\/jcp.2023.112238_br0170”,“doi-asserted-by”:“crossref”,“首页”:“2049”,“doi”:“101016\/jcp.2011.022”,“article-title”:“隐式过滤器的随机映射实现”,“volume”:“231”,“author”:“Morzfeld”,“year”:“2012”,“journal-title”:“J.Compute.Phys.”},{“key”:《10.1016\/J.jcp.2023.112238_br0180》,“doi-asserted-by”:“crossref”,“doi”:“10.1063\/50006103”,“article-title“从动态轨迹的统计分析中重建有效势”,“volume”:“10”,“作者”:“Yousefzadi Nobakht”,“年份”:“2020”,“期刊标题”:“AIP Adv..”},{“密钥”:“10.1016\/j.jcp.2023.112238_br0190”,“系列标题”:“机器学习论文集1994”,“首页”:“226”,“文章标题”:“增量多步骤q学习”,“作者”:“Peng”,“年份”:“1994”},{“问题”:“4”,“密钥”:“10.1016\/j.jcp.2023.112238_br0200”,doidi断言者“:“crossref”,“first page”:“966”,“DOI”:“10.1137 \/0328054”,“article-title”:“最优控制问题的一般随机最大值原理”,“volume”:”28“,“author”:“Peng”,“year”:“1990”,“journal-title“:”SIAM J.control Optim.“},{“key“:”10.1016 \/J.jcp.2023.112238_br0210“,“series-title”:”强化学习:导论“,”author“:”Sutton“,“year”:“2014”},{“issue”:“3”,“key”:“10.1016\/j.jcp.2023.112238_br0220”,“doi-asserted-by”:“crossref”,“first page”:”58“,”doi“:”10.1145\/203330.203343“,”article-title“:”时间差异学习和td-gammon“,”volume“:”38“,”author“:”Tesauro“,”year“:”1995“,”journal-title”:“Commun.ACM”}“key“:”10.1016\/j.jcp.2023.112238_br0230“,”series-title“:“人工智能年会”,“首页”:“335”,“文章标题”:“基于价值差异的探索:epsilon贪婪与softmax之间的自适应控制”,“作者”:“Tokic”,“年份”:“2011”},{“键”:“10.1016\/j.jcp.2023.112238_br0240”,“doi-asserted-by”:“crossref”,“非结构化”:“Neythen j。Treloar,Nathan Braniff,Brian Ingalls,Chris P.Barnes,生物优化实验设计的深度强化学习,bioRxiv,2022.“,“DOI”:“10.1101\/2022.05.09.491138”},{“key”:“101016\/j.jcp.2023.112238_br0250”,“series-title”:“AAAI人工智能会议论文集”,“article-title”:双q学习的深度强化学习”,“卷”:“第30卷”,“作者”:“Van Hasselt”,“年份”:“2016”},{“期”:“198”,“关键”:“10.1016\/j.jcp.2023.112238_br0260”,“首页”:“1”,“文章标题”:“连续时间和空间中的强化学习:一种随机控制方法”,“卷”:“21”,“作者”:“王”,“年份”:“2020”,“期刊标题”:“j”。机器。学习。研究“},{“问题”:“3”,“关键”:“10.1016\/j.jcp.2023.112238_br0270”,“doi-asserted-by”:“crossref”,“首页”:“279”,“doi”:“101007\/BF00992698”,“文章标题”:“Q-learning”,“volume”:“8”,“author”:“Watkins”,“year”:“1992”,“journal-title”:“Mach.Learn.”},}“关键”cp.2023.112238_br0280“,”文章标题“:”随机控制:哈密顿系统和HJB方程”,“卷”:“第43卷”,“作者”:“勇”,“年”:“1999”},{“问题”:“1”,“关键”:“10.1016\/j.jcp.2023.112238_br0290”,“doi-asserted-by”:“crossref”,“首页”:“459”,“doi”:“101214\/aoap\/1075828058”,“文章标题”:“BSDEs的数值方案”,“体积”:“14”,“作家”:“张”,“年份”:“2004”,“新闻标题“:”Ann.Appl。普罗巴伯。“},{”key“:”10.1016\/j.jcp.2023.112238_br0300“,”series-title“:”第七届高级计算智能与智能信息学国际研讨会(IWACIII 2021)“,”article-title“:“基于好奇和强化学习的迷宫机器人自主导航方法”,“author”:“Zhang”,“year”:“2021”},“issue”:“4”,“key”:“”10.1016\/j.jcp.2023.112238_br0310“,”doi-asserted-by“:”crossref“,”first page“:”A1731“,”doi“:”10.1137\/130941274“,”article-title“:”耦合正倒向随机微分方程的新型高阶多步格式“,”volume“:“36”,”author“:”Zhao“,”year“:”2014“,”journal-title”:“SIAM j.Sci”。计算。“}],”容器标题“:[”计算物理杂志“],”原标题“:[],”语言“:”en“,”链接“:[{”URL“:”https:\/\/api.elsevier.com/content\/article\/PII:S021999123003339?httpAccept=text\/xml“,”内容类型“:”text\/xml“,”内容版本“:”vor“,”预期应用程序“:”文本挖掘“},{”URL“:”https:\/\/api.elsevier.com/content\/article\/PII:S0021999123003339?httpAccept=text\/plain“,”content-type“:”text\/prain“,“content-version”:“vor”,“intended-application”:“text-mining”}],“deposed”:{“date-parts”:[[2023,6,9]],“date-time”:“2023-06-09T23:34:42Z”,“timestamp”:1686353682000},“score”:1,“resource”:{primary“:{”URL“https:\\/linkinghub.elsevier.com\/retrieve\/pii\/S0021 999123003339“}},”副标题“:[],”shorttitle“:[],”issued“:{”date-parts“:[[2023,9]]},”references-count“:31,”alternative-id“:[”S0021999123003339“],”URL“:”http://\/dx.doi.org\/10.1016\/j.jcp.2023.112238“,”relationship“:{},“ISSN”:[“0021-9991”],“ISSN-type”:[{“value”:“0021-999”,”type“:”print“}],”subject“:[],“published”:{“date-parts”:[[2023,9]]},“assertion”:[{“value”:“Elsevier”,“name”:“publisher“,”label“:”本文由“},{“value”:“参数化环境下强化学习的随机最大值原理方法”,“name”:“articletite”,“label”:“article Title”},“value“:”Journal of Computational Physics“,”name“:”journaltitle“,”label“:“Journal Titlehttps:\/\/doi.org\/10.1016\/j.jcp.2023.112238“,”name“:”articlelink“,”label“:”CrossRef doi link to publisher maintained version“},”{“value”:“article”,“name”:“content_type”,“label”:“content-type”},{“value”:“\u00a9 2023 Elsevier Inc.保留所有权利