{“状态”:“确定”,“消息类型”:“工作”,“信息版本”:“1.0.0”,“邮件”:{“索引”:{-“日期部分”:[[2022,7,3]],“日期时间”:“2022-07-03T00:48:21Z”,“时间戳”:1656809301285},“引用计数”:29,“发布者”:“IEEE”,“内容域”:{“域”:[],“交叉标记限制”:false},”短容器时间“:[]”,“发布发布者”:{“日期部分”:[[2014,5]]},“DOI”:“10.1109\/icra.2014.6907631”,“type”:“proceedings-article”,“created”:{“date-parts”:[[2014,9,30]],“date-time”:“2014-09-30T16:32:36Z”,“timestamp”:1412094756000},“source”:”Crossref“,”is-referenced-by-count“:8,”title“:[”根据线性可解MDP,结合学习的控制器以实现新目标“],”前缀“:”10.1109“,“author”:[{“给定”:“Eiji”,“family”:“Uchibe”,“sequence”:“first”,“affiliation”:[]}:“国际自动控制联合会世界大会议事录”},{“key”:“ref11”,“首页”:“1856”,“文章标题”:“最优控制律的组成”,“author”:“todorov”,“year”:“2009”,“journal-title”:“Advances in Neural Information Processing Systems 22”}:“10.1145\\1531326.1531388”},{“key”:“ref13”,“doi断言者”:“publisher”,“doi”:“10.3389\/fnbot.2013.0007”},{“key”:“ref14”,“doi断言者”:“publisher”,“doi”:“10.1023\/A:107936530646”},{“key”:“ref15”,“首页”:“1107”,“文章标题”:“最小二乘法政策迭代”,“卷”:“4”,“作者”:“lagoudakis”,“年份”:“2003”,“期刊标题”:“Journal of Machine Learning Research”},{“key”:“ref16”,“doi-asserted-by”:“publisher”,“doi”:“10.1162\/089976602753712972”}“:”参考19“,“doi-asserted-by”:“publisher”,“doi”:“10.1007\/s10339-011-0404-1”},{“key”:“ref28”,“doo-asserted-by”:”publisher“,”doi“:”10.1109\/DevLrn.2013.6652533“},}“key”:”ref4“,”doi-assert-by“:”publister“,”doi“:“10.1016\/j.neuron.2011.02.027”}“,{”key“:”ref27“,”article-title“:“使用路径积分强化学习对周期性运动进行阶段相关轨迹优化”,“author”:“sugimoto”,“year”:“2011”,“journal-title”:“Japanese Neural Network Society Annual Conference”},{“key”:“ref3”,“doi-asserted-by”:“publisher”,“doi”:“10.1016\/jmp.2008.12.005”},“DOI”:“10.1073\/pnas.0710743106”},{“key”:“ref29”,“DOI-asserted-by”:“publisher”,“DOI:”10.1016\/j.orl.2011.06.014 Markov决策问题”,“作者”:“todorov”,“年份”:“2007年”,“新闻标题”:“Advances in Neural Information Processing Systems 19”},{“key”:“ref7”,“doi-asserted-by”:“publisher”,“doi”:“10.1073\/pnas.0905423106”}:“ref1”,“doi-asserted-by”:“publisher”,“DOI”:“10.7551\/mitpress\/9780262042383.003.0012”},{“key”:“ref20”,“DOI-asserted-by”:“publisher”,“DOI”:“10.1016\/j.robot.2011.07.006”}、{“密钥”:“ref22”,“DOI-assertd-by”:“publisher”,“DI:”10.1109\/CD.2012.6426381 10.1109\/robot.2008.4543306“},{“key”:“ref24”,“DOI-asserted-by”:“publisher”,“DOI”:“10.1088\/1742-5468\/2005/11/P11011”},{“key”:“ref23”,“doi断言者”:“publisher”,“doi”:“10.1103\/PhysRevLett.95.200201”},{“key”:“ref26”,“文章标题”:“协方差矩阵自适应的路径综合政策改进”,“作者”:“stulp”,“年份”:“2012”,“期刊标题”:“第十届欧洲强化学习研讨会(EWRL 2012)论文集”},{“key”:“ref25”,“first-page”:“3137”,“article-title”:“强化学习的广义路径积分控制方法”,“volume”:”11“,“author”:“theodorou”,“year”:“2010”,“journal title”:《机器学习研究杂志》}],“event”:{“name”:“2014 IEEE机器人与自动化国际会议(ICRA)”,“location”:“中国香港”,“start”:{“date-parts”:[[2014,5,31]]},“end”:{“date-parts”:[[2014,6,7]]}},”container-title“:[“2014 IEEE机器人与自动化国际会议(ICRA)”],“original-title”:[],“link”:[{“URL”:“http://\xplorestaging.IEEE.org\/ielx7\/6895053\/6906581\/06907631.pdf?arnumber=6907631”,“content-type”:“unspecified”,“content-version”:“vor”,“意向应用程序”:“相似性检查”}],“存放”:{“日期部分”:[[2017,3,23]],“日期时间”:“2017-03-23T17:26:04Z”,“时间戳”:1490289964000},“分数”:1,“资源”:{primary:{”URL“:”http://ieeexplore.iee.org\/document\/6907631\/“}”,“副标题”:[],“短标题”:[],“已发布”:{“日期部分“:[[2014,5]]},“references-count”:29,“URL”:“http://\/dx.doi.org\/10.109\/icra.2014.6907631”,“关系”:{},“主题”:[],“发布”:{“日期部分”:[[2014,5]]}}