{“状态”:“确定”,“消息类型”:“工作”,“信息版本”:“1.0.0”,“邮件”:{“索引”:{“日期部分”:[2022,4,1]],“日期时间”:“2022-04-01T22:09:25Z”,“时间戳”:1648850965562},“参考计数”:45,“出版商”:“电气与电子工程师学会(IEEE)”,“问题”:“2”,“资助者”:[{“DOI”:“10.13039\/100014895”,“名称”:“开放慈善项目AI奖学金”,“doi-asserted-by”:“publisher”},{“doi”:“10.13039\/501100000923”,“name”:“澳大利亚研究委员会发现项目”,“doi-asserted-by”:”publisher“,”award“:[”DP150104590“]}],“content-domain”:{“domain”:[],“crossmark-restriction”:false},“short-container-title”:[”IEEE J.Sel.Areas Inf.Theory“],“published-print”:{“date-parts”:[[2021,6]]},“DOI”:“10.1109\/jsait.2021.3079722”,“type”:“journal-article”,“created”:{“date-ports”:[2021,5,14]],“date-time”:“2021-05-14T19:45:58Z”,“timestamp”:1621021558000},”page“:”665-677“source”:”Crossref“,”is-referenced-by-count“:0,”title“:[“好奇心杀死了猫或使猫丧失了能力和渐近最优代理”],“前缀”:“10.1109”,“卷”:“2”,“作者”:[{“ORCID”:“http://\/ORCID.org\/0000-0003-1749-875X”,“authenticated-ORCID”:false,“给定”:“Michael K.”,“family”:“Cohen”,“sequence”:“first”,“affiliation”:[]},{“给定”:“Elliot”,“家族”:“Catt”,“序列”:“additional”,“从属关系”:[]},{“ORCID”:“http://\/ORCID.org\/0000-0002-3263-4097”,“authenticated-ORCID”:false,“给定”:“马库斯”,“家族”:“Hutter”,“序列”:“附加”,“从属”:[]}],“成员”:“263”,“引用”:[{“key”:”ref39“,”doi-asserted-by“:”publisher“,”doi“:”10.1016\/j.tcs.2013.09.022“}”,{”key“:“ref38”,“doi-asserted-by”:“publisher”,“doi”:“10.1609\/aaaai.v34i03.5628”},{“key”:“ref33”,“author”:“kosoy”,“year”:“2019”,“journal-title”:“委托强化学习学习在稍微有帮助的情况下避免陷阱”},“期刊标题”:“Proc Adv Neural Inf Process Syst”},{“key”:“ref31”,“author”:“moldovan”,“year”:“2012”,“期刊标题”:“马尔可夫决策过程中的安全探索”},{“key”:“ref30”,“doi asserted by”:“publisher”,“doi”:“10.5244\/C.331.11”},{“key”:“ref37”,“首页”:“481”,“文章标题”:“通过实践和批评建议强化学习”,“volume”:“24”,“author”:“judah”,“year”:“2010”,“journal-title”:“Proc AAAI Conf Artif Intell”},{“key”:”ref36“,”first page“:”12“,”article-title“:”Robot learning from demonstration“,”volume“:“97”,“author”:“atkeson”,“年”:“1997”,“journal-ttitle”:”Proc ICML“}”,{”key“ref35”,“作者”:“abel”,“年份”:“2017”,“新闻标题“:“Agent-Agnostic Human-in-the-Loop强化学习”},{“key”:“ref34”,“first-page”:“1000”,“article-title”:“与人类教师的强化学习:反馈和指导的证据及其对学习绩效的影响”,“volume”:《6》,“author”:“thomaz”,“year”:“2006”,“journal-title》:“Proc AAAI”}:“publisher”,“DOI”:“10.1007\/978-3-642-16108-7_28”},{“key”:“ref40”,“DOI-asserted-by”:“publicher”,“DOI”:”10.1016\/j.tcs.2017.11.020“}”,{”key“:”ref11“,”first page“:key“:”ref12“,”author“:”amodei“,”year“:“2016”,“journal-title”:“ai安全中的具体问题”},{“key”:“ref13”,“first page”:”1437“,“article-title“:“关于安全强化学习的综合调查”,“volume”::“16”,“author”:“garc\u00eda”,“year”:“2015”,“日记标题”:“J Mach Learn Res”}、{“密钥”:“ref14”,“doi-asserted-by”:“publisher”,“doi”:“10.1016\/B978-1-55860-335-6.5002 1-0“},{”键“:“ref15”,“doi断言者”:“publisher”,“doi”:“10.1287\/opre.1050.0216”},{“key”:“ref16”,“首页”:“1327”,“文章标题”:“风险敏感控制的学习算法”,“作者”:“borkar”,“年份”:“2010”,“期刊标题”:“Proc 19th Int Symp Math Theory Netw Syst”},{“key”:“ref17”,“doi断言者”:“publisher”,“doi”:“10.1023\/A:1017940631555”},{“密钥”:“ref18”,“author”:“di castro”,“year”:“2012”,“journal-title”:“具有方差相关风险标准的政策梯度”},{“key”:“ref19”,“doi-sserted-by”:“publisher”,”doi“:”10.1145 \/1015330.1015430 417“,”物品标签“:“汤普森抽样在一般环境下渐近最优”,“author”:“leike”,“year”:“2016”,“journal-title”:“Proc 32nd Int Conf Understance Artif Intell(UAI)”},{“key”:“ref27”,“doi-asserted-by”:“publisher”,“doi”:“10.1016\/B978-1-55860-247-2.50017-6”}_12“},{”键“:“ref6”,“doi-asserted-by”:“publisher”,“doi”:“10.24963\/ijcai.2019\/302”},{“key”:“ref29”,“首页”:“2067”,“article-title”:“无误试验:通过人工干预实现安全强化学习”,“author”:“saunders”,“year”:“2018”,“journal-title“Proc 17th Int Conf Auton Agents MultiAgent Syst”},“DOI”:“10.1007\/978-3-319-11662-4_13”},{“key”:“ref8”,“author”:“leike”,“year”:“2016”,“journal-title”:“non-parameteral general reinforcement learning”}、{“key”:”ref7“,”DOI-asserted-by“:”publisher“,”DOI“:”10.1093\/biomet\/25.3-4.285:“AiXijs A software demo for general reinforcement learning”},{“key”:“ref9”,“first-page”:“1345”,“article-title”:“general reservation learning中的合理性、乐观性和保证性”,“volume”:”16“,“author”:“sunehag”,“year”:“2015”,“journal-title“:”J Mach learning Res“}”,{”key:“ref1”,“doi-asserted-by”:“publisher”,“doi”:“10.1007\/978-3642-24412-49”},{“key”:“ref20”,“首页”:“1449”,“article-title”:“学徒制学习的游戏理论方法”,“author”:“syed”,“year”:“2008”,“journal-title“:”Proc NIPS“},{”key“:”ref45“,”doi-asserted-by“:”publisher“,”doi“:”10.1007\/b138233“}:“模仿学习和结构化预测减少到无更新的在线学习”,“作者”:“罗斯”,“年份”:“2011年”,“新闻标题”:“第14届国际会议论文情报统计”},{“关键”:“参考21”,“首页”:“4565”,“文章标题”:”生成性对抗性模仿学习“,“作者“:”ho“,”年份“:”2016“,”新闻标题“:“Proc-Adv Neural Inf Process Syst”},{“key”:“ref42”,“doi-asserted-by”:“publisher”,“doi”:“10.24963\/ijcai.2017\/194”}、{“key”:《ref24》,“first page”:第143页,“article-title”:“强化学习的安全探索”,“author”:“hans”,“year”:“2008”,“journal-title“:”Proc-ESANN“}”,{”key“:”ref41“,“doi-asserted-by”:“出版商”、“内政部”:“10.1214\/aoms\/11770704456”},{“key”:“ref23”,“article-title”:“学徒学习与强化学习相结合”,“author”:“clouse”,“year”:“1997”},“author”:“garc\u00eda”,“year”:“2013”,“journal-title”:“Proc RLDM”},{“key”:“ref43”,“首页”:“1589”,“article-title“:“应用于Monte-Carlo AI实现的通用折扣函数”,“author”:“lamont”,“年份”:“2017”,“日记标题”:“第16届会议自动代理多代理系统”}:“10.1613\/jair.3761”}],“container-title”:[“IEEE Journal on Selected Areas in Information Theory”],“原始标题”:[],“链接”:[{“URL”:“http://\/xplorestaging.IEEE.org\/ielx7\/8700143\/9459757\/09431093.pdf?arnumber=9431093”,“内容类型”:“unspecified”,“content-version”:“vor”,“intended-application”:“similarity-checking”},“存放”:{“日期部分”用法:[2021,11,8]],“日期时间”:“2021-11-08T22:36:25Z”,“时间戳”:1636410985000},“分数”:1,“资源”:{“主要”:{“URL”:“https:\/\/ieeexplore.ieee.org\/document\/9431093\/”}},”subtitle“:[],”shorttitle“:[],”issued“:{”date-parts“:[2021,6]]},‘references-count’:45,”journal-issue:{“issue”“:”2“},”URL“:”http://\/dx.doi.org\/10.109\/jsait.2021.3079722“,“关系”:{},“ISSN”:[“2641-8770”],“ISSN-type”:[{“值”:“26418-8770”,“类型”:“电子”}],“主题”:[],“发布”:{“日期部分”:[2021,6]]}}}