{“状态”:“确定”,“消息类型”:“工作”,“信息版本”:“1.0.0”,“邮件”:{“索引”:{“日期-部件”:[[2023,7,25]],“日期-时间”:“2023-07-25T04:20:39Z”,“时间戳”:1690258839595},“引用-计数”:51,“发布者”:“IEEE”,“许可证”:[{“开始”:“日期-零件”:[2023,5,29]],”日期-时间“:”2023-05-29T00:00:00Z“,”timestamp“:1685318400000},”content-version“:“stm-asf”,“delay-in-days”:0,“URL”:“https:\\/doi.org\/10.15223\/policy-029”},{“start”:{“date-parts”:[[2023,5,29]],“date-time”:“2023-05-29T00:00:00Z”,“timestamp”:1685318400000},“content-version”:“stm-asf”}],“内容域”:{“域”:[],“交叉标记限制”:false},“short-container-title”:[],“published-print”:{“date-parts”:[[2023,5,29]]},“DOI”:“10.1109\/icra48891.2023.10161186”,“type”:“proceedings-article”,“created”:{“date-ports”:[2023,7,4]],“date-time”:“2023-07-04T17:20:56Z”,“timestamp”:1688491256000},”source:“Crossref”,”is-referenced-by-count“:0,”title“:[“通过重尾策略优化处理连续控制机器人中的稀疏奖励”],“prefix”:“10.1109”,“author”:[{“given”:“Souradip”,“family”:“Chakraborty”,“sequence”:“first”,“affiliation”:[}“name”:“University of Maryland,College Park,MD,USA”}]}:“马里兰大学,马里兰州大学帕克分校,美国”}]},{“given”:“Kasun”,“family”:“Weerakoon”,“sequence”:“additional”,“affiliation”:[{“name”:“马里兰州大学,马里大学帕克学院,美国”{]}:“Koppel”,“sequence”:“additional”,“affiliation”:[{“name”:“JP Morgan AI Research,New York,NY,USA”}]},{“given”:“Pratap”,“family”:“Tokekar”,“serquence”:“addition”,“feliation”:[{(名称):“University of Maryland,College Park,MD,USA“}]},{”given“:”Dinesh“,”family“:”Manocha“,”sequence“:”additional“,“afliation”:[{”name“:“马里兰大学,马里兰州大学帕克分校,美国马里兰州”}]}],“成员”:“263”,“参考”:[{“key”:“ref13”,“doi-asserted-by”:“publisher”,“doi”:“10.1109\/ICRA.2018.8463162”},{“密钥”:“ref12”,“doi-asserte-by”:“publisher”,“DI:”10.1016\/B978-1-55860-335-6.50030-1“}”,{”key“:”ref15“,“首页”:“2”,“article-title“:反向强化学习算法”,“volume”:“1”,“author”:“ng”,“year”:“2000”,“journal-title”:“ICML”},{“key”:”ref14“,”article-title“:”利用演示进行机器人问题的深度强化学习,奖励稀少“,”author“:”vecerik“,”year“:”2017“,”journal-title“:”ArXiv Preprint“},”{“key”:“ref11”,“article-title”:“关于移动机器人导航的奖励塑造:基于强化学习和满贯的方法”,“author”:“botteghi”,“year”:“2020”,“journal-title”:“ArXiv Preprint”},{“key”:“ref10”,“doi-asserted-by”:“publisher”,“doi”:“10.1162\/neco_A_01387”}:“使用单个演示进行近端策略优化的引导性探索”,“author”:“libardi”,“year”:“0”,“journal-title”:“International Conference on Machine Learning”},{“key”:“ref16”,“first-page”:“1433”,“article-title(文章标题):“Maximum熵反向强化学习”,“volume”:,{“key”:“ref19”,“author”:“chakraborty”,“year”:“2022”,“期刊标题”:“基于模型的强化学习的核化stein差异的后验核集构建”},{“key”:“ref18”,“doi断言”:“publisher”,“doi”:“10.1109\/CVPRW.2017.70”},{“key”:“ref51”,“year”:“2022”,“期刊标题”:“关于“通过重尾策略优化处理连续控制机器人中的稀疏奖励”的技术报告”},{“key”:“ref50”,“article-title”:“Openai健身房”,“author”:“brockman”,“year”:“2016”,“journal-title“:“ArXiv预打印”},“journal-title”:“ArXiv Preprint”},{“key”:“ref45”,“doi-asserted-by”:“publisher”,“doi”:“10.1613\/jair.806”}:“ddpg的问题:理解具有稀疏回报的确定性环境中的失败”,“作者”:“matheron”,“年份”:“2019年”,“日志标题”:“ArXiv预打印”},{“key”:“ref42”,“doi-asserted-by”:“publisher”,“doi”:“10.1137\/19M1288012”},{“key”:“ref44”,“first-page”:“4026”,“article-title”:“随机方差减少的政策梯度”,“author”:“papini”,“year”:“0”,“journal-title“:”机器学习国际会议“},{“key”:”ref43“,“article-title:”政策梯度方法的随机递归动量“,”author“:”yuan“,”year“2020”,“journal-ttitle”:”ArXiv Preprint“}”,{“key”:“ref49”,“首页”:“1889”,“article-title”:“托管区域政策优化”,“author”:“schulman”,“year”:“0”,“journal-title“:“机器学习国际会议”},{“key”:”ref8“,“first-page”:“2469:“机器学习国际会议”},{“key”:“ref7”,“doi-asserted-by”:“publisher”,“doi”:“10.1109\/IROS45743.2020.9341714”},“doi断言者”:“publisher”,“doi”:“10.1109\/ICRA46639.2022.9812238”},{“key”:“ref3”,“doi断言者”:“publisher”,“doi”:“10.1109\/IROS45743.2020.9341540”},{“key”:“ref6”,“doi断言者”:“publisher”,“doi”:“10.15607\/RSS.209.XV.073”},{“key”:“ref5”,“doi断言者”:“publisher”,“doi”:“10.1109\/RA.2020.2966414“},{“key”:“ref40”,“author”:“chou”,“年份”:“2017”,“期刊标题”:“持续控制强化学习的beta策略”},{“key”:“ref35”,“doi断言”:“publisher”,“doi”:“10.1145\/30544912”},{“key”:“ref34”,“文章标题”:“探索的参数空间噪声”,“author”:“plappert”,“year”:“2017”,“期刊标题”:“ArXiv预印本”},{“key”:“ref37”,“文章标题”:“从演示中学习”,“卷”:“9”,“作者”:“schaal”,“年份”:“1996”,“新闻标题”:“神经信息处理系统的进展”},“doi-asserted-by”:“publisher”,“doi”:“10.1137\/07071011”},{“key”:“ref30”,“doo-asserted-by”:”publisher“,”doi“:”10.1126\/science.279.5347.39“},”{“key”:“ref33”,“article-title”:“基于动量的非凸sgd方差缩减”,“volume”:”32“author”:“cutkosky”,“year”:“2019”,“journal-title“:”“Advances in neural information processing systems”},{“键”:“ref32”,“article-title”:“Htron:通过重尾自适应增强算法实现具有稀疏奖励的高效户外导航”,“author”:“weerakoon”,“year”:“0”,“journal-title“:“第六届机器人学习年会”},{“key”:“ref2”,“doi-asserted-by”:“publisher”,“doi”:“10.1109\/IROS.2018.8593871”}:“使用模拟运动演示进行机器人操作的强化学习”,“author”:“kilinc”,“year”:“2021”,“journal-title”:“Machine learning”},{“key”:“ref39”,“article-title“:“通过深度强化学习和演示学习复杂的灵巧操作”,“author”:“rajeswaran”,“year”:“2017”,“journal-ttitle”:“ArXiv Preprint”},{“key”:“ref38”,“article-title”:“来自示范和好奇心的政策梯度”,“author”:“chen”,“year”:“2020”,“journal-title“:”ArXiv Preprint“},}”key“:”ref24“,”article-title“关于连续控制下重尾政策搜索的样本复杂性和亚稳态”,“author”:“bedi”,“年份”:“2021”,“日记标题”:“ArXiv预印本”},{“key”:“ref23”,“首页”:“6820”,“文章标题”:“关于softmax政策梯度方法的全球收敛速度”,“作者”:“mei”,“年份”:“0”,“期刊标题”:“国际机器学习会议”},{“key”:“ref26”,“doi asserted by”:“publisher”,“doi”:“10.1512\\iumj.1981.30.30055”},{“key”:“ref25”,“首页”:“1716”,“article-title”:“关于连续行动空间中政策镜像上升的隐藏偏见”,“volume”:“162”,“author”:“bedi”,”year“:“0”,“journal-title“:“第39届国际机器学习会议论文集-机器学习研究论文集”},{“key”:”ref20“,”article-title“:“Vime:变异信息最大化探索”,“volume”:“29”,“author”:“houthoft”,“year”:“2016”,“journal-title”:“Advances in neural information processing systems”},{“key”:”ref22“,”doi-asserted-by“:”publisher“,”doi“:”10.1609 \/aaai.v32i1.11757“},“key“:”ref21“,”author“:”chakraborty“,”year“:”2023“,”journal-title“:“Stein信息导向探索基于模型的强化学习”},{“key”:“ref28”,“volume”:”2“,“author”:“taleb”,“year”:“2007”,“journal-title”:“The Black Swan The Impact of The Highly Improbable”}、{“key”::“ref27”,“volume”:“1”、“author:”mandelbrot“,“year:”1982“,”journal-title“:”The Fractal Geometry of Nature“},}”key:“ref29”,“doi-asserted-by”:“publisher”,“doi”:“10.1257\/mac.1.1.58”}],“event”:{“name”:“2023 IEEE机器人与自动化国际会议(ICRA)”,“location”:“London,United Kingdom”,“start”:{-“date-parts”:[[2023,5,29]]},“end”:{--“date-parts”:[2023,6,2]}},“original-title”:[],“link”:[{“URL”:“http://\/xplorestaging.ieee.org\/ielx7\/10160211\/10160212\/10161186.pdf?arnumber=10161186”,“content-type”:“unspecified”,“content-version”:“vor”,“intended-application”:“similarity-checking”}],“deposed”:{“date-parts”:[2023,7,24]],“date-time”:”2023-07-24T17:37:24Z“时间戳”:1690220244000},“分数”:1,“资源”用法:{“primary”:{“URL”:“https:\/\/ieeexplore.iee.org\/document\/10161186\/”}},“subtitle”:[],“shorttitle”:[],“issued”:{日期部分:[[2023,5,29]]},”references-count“:51,”URL“:”http://\/dx.doi.org\/101109\/icra48891.2023.10161186“,”关系“:{}”,“subject”:[]],“published”:rts“:[[2023,5,29]]}}}