{“状态”:“确定”,“消息类型”:“工作”,“信息版本”:“1.0.0”,“邮件”:{“索引”:{-“日期部分”:[2022,4,2],“日期时间”:“2022-04-02T19:50:57Z”,“时间戳”:1648929057494},“引用计数”:26,“发布者”:“IEEE”,“内容域”:{“域”:[],“交叉标记限制”:false},”短容器时间“:[]”,“发布发布者”:{“日期部分”:[[2017,9]]},“DOI”:“10.1109\/iros.2017.8205960”,“type”:“procesedings-article”,“created”:{“date-parts”:[[2017,12,14]],“date-time”:“2017-12-14T22:12:59Z”,“timestamp”:1513289579000},“source”:《Crossref》,“is-referenced-by-count”:8,“title”:[“使用原始图像进行机器人控制的深度动态策略编程”],“prefix”:“10.109”,“author”:[{“given”:”Yoshisa“,“family”:“Tsurumine”,“sequence”:“first”,“affiliation”:[]},{“given”:“Yunduan”,“family”:“Cui”,“serquence”:“additional”,“ffiliation”:[]},}“giving”:“Eiji”,“家族”:“Uchibe”,“序列”:“additionable”,“从属关系”:[]}“,”reference“:[{”key“:”ref10“,”first-page“:”2094“,“article-title”:“双Q学习的深度强化学习”,“author”:“van hasselt”,“year”:“2016”,“journal-title“:“Association for the Advancement of Artificial Intelligence(AAAI)”},{“key”:“ref11”,“first-page”:“1995”,“article-title:”“Dueling network architectures for Deep reinforcement learning”,“author”:“wang”,“year”:”2016“,“journal-title”:“机器学习国际会议(ICML)ICML'16”},{“key”:“ref12”,“first-page”:“119”,“article-title“:“函数近似的动态政策规划”,“author”:“azar”,“year”:“2011”,“jornal-tittle”:《国际人工智能与统计会议(AISTATS)》},“article-title”:“动态策略编程”,“volume”:“13”,“author”:“azar”,“year”:“2012”,“journal-title“:”The journal of Machine Learning Research“},{”key“:”ref14“,”doi-asserted-by“:”publisher“,”doi“:”10.1007\/BF00992698“}”,{“key”:”ref15“,”first page“”:”1038“,”article-title“:“强化学习中的泛化:使用稀疏粗编码的成功示例”,“author”:“sutton”,“year”:“1996”,“journal-title”:“Advances in Neural Information Processing Systems(NIPS)”},{“key”:“ref16”,“first page”:《1107》,“article-title》:“Least-squares policy iteration”,“volume”::《机器学习研究杂志》},{“key”:“ref17”,“首页”:“1369”,“article-title”:“线性可解的马尔可夫决策问题”,“author”:“todorov”,“year”:“2006”,“Journal-title“:“Advances in Neural Information Processing Systems(NIPS)”},“key“:“ref18”,“doi-asserted-by”:“publisher”,“doi”:“10.1080\/01691864.2016.1274680”},“doi-asserted-by”:“publisher”,“doi”:“10.1109\/HUMANOIDS.2016.7803345”},{“key”:“ref4”,“first page”:《1097》,“article-title”:“Imagenet classification with deep convolutional neural networks”,“author”:“krizhevsky”,“year”:“2012”,“journal-title“:“Advances in neural Information Processing Systems(NIPS)”}:“10.1109 \/MRA.2010.936957”},{“key”:“ref6”,“doi-asserted-by”:“publisher”,“doi”:“10.109\/CVPR.2015.7298594”}8947“},{”key“:”ref7“,”doi-asserted-by“:”publisher“,”doi“:“10.1109\/TASL.2011.2134090”},{“key”:“ref2”,“doi断言者”:“publisher”,“doi”:“10.1177\/0273364913495721”},{“key”:“ref9”,“doi断言者”:“publisher”,“doi”:“10.1038\/nature14236”},{“key”:“ref1”,“author”:“sutton”,“year”:“1998”,“期刊标题”:“强化学习导论”},{“key”:“ref20”,“author”:“abadi”,“年份”:“2016”,“期刊标题”:“异构分布式系统上的Tensorflow大规模机器学习”},{“key”:“ref22”,“author”:“o'donoghue”,“year”:“2016”,“journal title”:“PGQ结合策略梯度和Q学习”},{“key”:“ref21”,“author”:“chollet”,“year”:“2015”,“journal title”:“Keras”},{“key”:“ref24”,“author”:“lilliclap”,“year”:“2015”,“journal title”:“深度强化学习的连续控制”},{“key”:“ref23”,“首页”:“1928”,“文章标题”:“深度强化学习的异步方法”,“作者”:“mnih”,“年份”:“2016”,“期刊标题”:“国际机器学习会议(ICML)”},{“key”:“ref26”,“文章标题”:“Q-prop:与非政策评论家合作的高效政策梯度样本”,“author”:“gu”,“year”:“2017”,“journal-title”:“国际学习代表大会(ICLR)”},{“key”:“ref25”,“article-title“:“高效actor-critic样本与经验重播”:“学习表征国际会议(ICLR)”}],“事件”:{“名称”:“2017 IEEE \/RSJ智能机器人和系统国际会议(IROS)”,“地点”:“不列颠哥伦比亚省温哥华”,“开始”:{-“日期-部分”:[[2017,9,24]]},“结束”:{-date-parts“:[[2017,9,28]}},”container-title“:[“2017 IEEE \/RSJ智能机器人和系统国际会议(IROS)”],“original-title”:[],“link”:[{“URL”:“http://\/xplorestaging.IEEE.org\/ielx7\/8119304\/8202121\/08205960.pdf?arnumber=8205960”,“content-type”:“unspecified”,“content-version”:“vor”,“intended-application”:“similarity-checking”}],“deposed”:{“date-parts”:[2018,2,28]],“date-time”:“2018-02-28T20:40:05Z”,“timestamp”:1519850405000},“score”:1,“resource”:{“primary”:{“URL”:“http://\/ieeexplore.iee.org\/document\/8205960\/”}},”subtitle“:[],”shorttitle“:[],”issued“:{”date-parts“:[2017,9]},‘references-count’:26,”URL“:”http://\/dx.doi.org \/10.109 \/iros.2017.8205960“,”关系“:{},”主题“:[],”发布“:{“日期部分”:[[2017,9]]}}