{“状态”：“确定”，“消息类型”：“工作”，“信息版本”：“1.0.0”，“邮件”：{“索引”：{“日期-部件”：[[2023,7,25]]，“日期-时间”：“2023-07-25T04:20:39Z”，“时间戳”：1690258839595}，“引用-计数”：51，“发布者”：“IEEE”，“许可证”：[{“开始”：“日期-零件”：[2023,5,29]]，”日期-时间“：”2023-05-29T00:00:00Z“，”timestamp“：1685318400000}，”content-version“：“stm-asf”，“delay-in-days”：0，“URL”：“https:\\/doi.org\/10.15223\/policy-029”}，{“start”：{“date-parts”：[[2023,5,29]]，“date-time”：“2023-05-29T00:00:00Z”，“timestamp”：1685318400000}，“content-version”：“stm-asf”}]，“内容域”：{“域”：[]，“交叉标记限制”：false}，“short-container-title”：[]，“published-print”：{“date-parts”：[[2023,5,29]]}，“DOI”：“10.1109\/icra48891.2023.10161186”，“type”：“proceedings-article”，“created”：{“date-ports”：[2023,7,4]]，“date-time”：“2023-07-04T17:20:56Z”，“timestamp”：1688491256000}，”source：“Crossref”，”is-referenced-by-count“：0，”title“：[“通过重尾策略优化处理连续控制机器人中的稀疏奖励”]，“prefix”：“10.1109”，“author”：[{“given”：“Souradip”，“family”：“Chakraborty”，“sequence”：“first”，“affiliation”：[}“name”：“University of Maryland，College Park，MD，USA”}]}：“马里兰大学，马里兰州大学帕克分校，美国”}]}，{“given”：“Kasun”，“family”：“Weerakoon”，“sequence”：“additional”，“affiliation”：[{“name”：“马里兰州大学，马里大学帕克学院，美国”{]}：“Koppel”，“sequence”：“additional”，“affiliation”：[{“name”：“JP Morgan AI Research，New York，NY，USA”}]}，{“given”：“Pratap”，“family”：“Tokekar”，“serquence”:“addition”，“feliation”:[{（名称）：“University of Maryland，College Park，MD，USA“}]}，{”given“：”Dinesh“，”family“：”Manocha“，”sequence“：”additional“，“afliation”:[{”name“：“马里兰大学，马里兰州大学帕克分校，美国马里兰州”}]}]，“成员”：“263”，“参考”：[{“key”：“ref13”，“doi-asserted-by”：“publisher”，“doi”：“10.1109\/ICRA.2018.8463162”}，{“密钥”：“ref12”，“doi-asserte-by”：“publisher”，“DI:”10.1016\/B978-1-55860-335-6.50030-1“}”，{”key“：”ref15“，“首页”：“2”，“article-title“：反向强化学习算法”，“volume”：“1”，“author”：“ng”，“year”：“2000”，“journal-title”：“ICML”}，{“key”：”ref14“，”article-title“：”利用演示进行机器人问题的深度强化学习，奖励稀少“，”author“：”vecerik“，”year“：”2017“，”journal-title“:”ArXiv Preprint“}，”{“key”：“ref11”，“article-title”：“关于移动机器人导航的奖励塑造：基于强化学习和满贯的方法”，“author”：“botteghi”，“year”：“2020”，“journal-title”：“ArXiv Preprint”}，{“key”：“ref10”，“doi-asserted-by”：“publisher”，“doi”：“10.1162\/neco_A_01387”}：“使用单个演示进行近端策略优化的引导性探索”，“author”：“libardi”，“year”：“0”，“journal-title”：“International Conference on Machine Learning”}，{“key”：“ref16”，“first-page”：“1433”，“article-title（文章标题）：“Maximum熵反向强化学习”，“volume”：，｛“key”：“ref19”，“author”：“chakraborty”，“year”：“2022”，“期刊标题”：“基于模型的强化学习的核化stein差异的后验核集构建”｝，｛“key”：“ref18”，“doi断言”：“publisher”，“doi”：“10.1109\/CVPRW.2017.70”｝，｛“key”：“ref51”，“year”：“2022”，“期刊标题”：“关于“通过重尾策略优化处理连续控制机器人中的稀疏奖励”的技术报告”}，{“key”：“ref50”，“article-title”：“Openai健身房”，“author”：“brockman”，“year”：“2016”，“journal-title“：“ArXiv预打印”}，“journal-title”：“ArXiv Preprint”}，{“key”：“ref45”，“doi-asserted-by”：“publisher”，“doi”：“10.1613\/jair.806”}：“ddpg的问题：理解具有稀疏回报的确定性环境中的失败”，“作者”：“matheron”，“年份”：“2019年”，“日志标题”：“ArXiv预打印”}，{“key”：“ref42”，“doi-asserted-by”：“publisher”，“doi”：“10.1137\/19M1288012”}，{“key”：“ref44”，“first-page”：“4026”，“article-title”：“随机方差减少的政策梯度”，“author”：“papini”，“year”：“0”，“journal-title“：”机器学习国际会议“}，{“key”：”ref43“，“article-title:”政策梯度方法的随机递归动量“，”author“：”yuan“，”year“2020”，“journal-ttitle”：”ArXiv Preprint“}”，{“key”：“ref49”，“首页”：“1889”，“article-title”：“托管区域政策优化”，“author”：“schulman”，“year”：“0”，“journal-title“：“机器学习国际会议”}，{“key”：”ref8“，“first-page”：“2469：“机器学习国际会议”}，{“key”：“ref7”，“doi-asserted-by”：“publisher”，“doi”：“10.1109\/IROS45743.2020.9341714”}，“doi断言者”：“publisher”，“doi”：“10.1109\/ICRA46639.2022.9812238”}，｛“key”：“ref3”，“doi断言者”：“publisher”，“doi”：“10.1109\/IROS45743.2020.9341540”}，｛“key”：“ref6”，“doi断言者”：“publisher”，“doi”：“10.15607\/RSS.209.XV.073”}，｛“key”：“ref5”，“doi断言者”：“publisher”，“doi”：“10.1109\/RA.2020.2966414“}，{“key”：“ref40”，“author”：“chou”，“年份”：“2017”，“期刊标题”：“持续控制强化学习的beta策略”}，｛“key”：“ref35”，“doi断言”：“publisher”，“doi”：“10.1145\/30544912”｝，｛“key”：“ref34”，“文章标题”：“探索的参数空间噪声”，“author”：“plappert”，“year”：“2017”，“期刊标题”：“ArXiv预印本”}，｛“key”：“ref37”，“文章标题”：“从演示中学习”，“卷”：“9”，“作者”：“schaal”，“年份”：“1996”，“新闻标题”：“神经信息处理系统的进展”}，“doi-asserted-by”：“publisher”，“doi”：“10.1137\/07071011”}，{“key”：“ref30”，“doo-asserted-by”：”publisher“，”doi“：”10.1126\/science.279.5347.39“}，”{“key”：“ref33”，“article-title”：“基于动量的非凸sgd方差缩减”，“volume”：”32“author”：“cutkosky”，“year”：“2019”，“journal-title“：”“Advances in neural information processing systems”}，{“键”：“ref32”，“article-title”：“Htron：通过重尾自适应增强算法实现具有稀疏奖励的高效户外导航”，“author”：“weerakoon”，“year”：“0”，“journal-title“：“第六届机器人学习年会”}，{“key”：“ref2”，“doi-asserted-by”：“publisher”，“doi”：“10.1109\/IROS.2018.8593871”}：“使用模拟运动演示进行机器人操作的强化学习”，“author”：“kilinc”，“year”：“2021”，“journal-title”：“Machine learning”}，{“key”：“ref39”，“article-title“：“通过深度强化学习和演示学习复杂的灵巧操作”，“author”:“rajeswaran”，“year”:“2017”，“journal-ttitle”：“ArXiv Preprint”}，{“key”：“ref38”，“article-title”：“来自示范和好奇心的政策梯度”，“author”：“chen”，“year”：“2020”，“journal-title“：”ArXiv Preprint“}，}”key“：”ref24“，”article-title“关于连续控制下重尾政策搜索的样本复杂性和亚稳态”，“author”:“bedi”，“年份”：“2021”，“日记标题”：“ArXiv预印本”｝，｛“key”：“ref23”，“首页”：“6820”，“文章标题”：“关于softmax政策梯度方法的全球收敛速度”，“作者”：“mei”，“年份”：“0”，“期刊标题”：“国际机器学习会议”｝，｛“key”：“ref26”，“doi asserted by”：“publisher”，“doi”：“10.1512\\iumj.1981.30.30055”｝，｛“key”：“ref25”，“首页”：“1716”，“article-title”：“关于连续行动空间中政策镜像上升的隐藏偏见”，“volume”：“162”，“author”：“bedi”，”year“：“0”，“journal-title“：“第39届国际机器学习会议论文集-机器学习研究论文集”}，{“key”：”ref20“，”article-title“：“Vime：变异信息最大化探索”，“volume”：“29”，“author”：“houthoft”，“year”：“2016”，“journal-title”：“Advances in neural information processing systems”}，{“key”：”ref22“，”doi-asserted-by“：”publisher“，”doi“：”10.1609 \/aaai.v32i1.11757“}，“key“：”ref21“，”author“：”chakraborty“，”year“：”2023“，”journal-title“：“Stein信息导向探索基于模型的强化学习”}，{“key”：“ref28”，“volume”：”2“，“author”：“taleb”，“year”：“2007”，“journal-title”：“The Black Swan The Impact of The Highly Improbable”}、{“key”：：“ref27”，“volume”:“1”、“author:”mandelbrot“，“year:”1982“，”journal-title“：”The Fractal Geometry of Nature“}，}”key：“ref29”，“doi-asserted-by”：“publisher”，“doi”：“10.1257\/mac.1.1.58”}]，“event”：{“name”：“2023 IEEE机器人与自动化国际会议（ICRA）”，“location”：“London，United Kingdom”，“start”：{-“date-parts”：[[2023,5,29]]}，“end”：{--“date-parts”：[2023,6,2]}}，“original-title”：[]，“link”：[{“URL”：“http://\/xplorestaging.ieee.org\/ielx7\/10160211\/10160212\/10161186.pdf？arnumber=10161186”，“content-type”：“unspecified”，“content-version”：“vor”，“intended-application”：“similarity-checking”}]，“deposed”：{“date-parts”：[2023,7,24]]，“date-time”：”2023-07-24T17:37:24Z“时间戳”：1690220244000}，“分数”：1，“资源”用法：{“primary”:{“URL”：“https:\/\/ieeexplore.iee.org\/document\/10161186\/”}}，“subtitle”：[]，“shorttitle”：[]，“issued”：{日期部分：[[2023,5,29]]}，”references-count“：51，”URL“：”http://\/dx.doi.org\/101109\/icra48891.2023.10161186“，”关系“：{}”，“subject”：[]]，“published”：rts“：[[2023,5,29]]}}}