{“状态”：“确定”，“消息类型”：“工作”，“信息版本”：“1.0.0”，“邮件”：{“索引”：{“日期部分”：[2022,4,1]]，“日期时间”：“2022-04-01T22:09:25Z”，“时间戳”：1648850965562}，“参考计数”：45，“出版商”：“电气与电子工程师学会（IEEE）”，“问题”：“2”，“资助者”：[{“DOI”：“10.13039\/100014895”，“名称”：“开放慈善项目AI奖学金”，“doi-asserted-by”：“publisher”}，{“doi”：“10.13039\/501100000923”，“name”：“澳大利亚研究委员会发现项目”，“doi-asserted-by”：”publisher“，”award“：[”DP150104590“]}]，“content-domain”：{“domain”:[]，“crossmark-restriction”：false}，“short-container-title”：[”IEEE J.Sel.Areas Inf.Theory“]，“published-print”：{“date-parts”：[[2021,6]]}，“DOI”：“10.1109\/jsait.2021.3079722”，“type”：“journal-article”，“created”：{“date-ports”：[2021,5,14]]，“date-time”：“2021-05-14T19:45:58Z”，“timestamp”：1621021558000}，”page“：”665-677“source”：”Crossref“，”is-referenced-by-count“：0，”title“：[“好奇心杀死了猫或使猫丧失了能力和渐近最优代理”]，“前缀”：“10.1109”，“卷”：“2”，“作者”：[{“ORCID”：“http://\/ORCID.org\/0000-0003-1749-875X”，“authenticated-ORCID”：false，“给定”：“Michael K.”，“family”：“Cohen”，“sequence”：“first”，“affiliation”：[]}，{“给定”:“Elliot”，“家族”：“Catt”，“序列”：“additional”，“从属关系”：[]}，{“ORCID”：“http://\/ORCID.org\/0000-0002-3263-4097”，“authenticated-ORCID”：false，“给定”：“马库斯”，“家族”：“Hutter”，“序列”：“附加”，“从属”：[]}]，“成员”：“263”，“引用”：[{“key”：”ref39“，”doi-asserted-by“：”publisher“，”doi“：”10.1016\/j.tcs.2013.09.022“}”，{”key“：“ref38”，“doi-asserted-by”：“publisher”，“doi”：“10.1609\/aaaai.v34i03.5628”}，{“key”：“ref33”，“author”：“kosoy”，“year”：“2019”，“journal-title”：“委托强化学习学习在稍微有帮助的情况下避免陷阱”}，“期刊标题”：“Proc Adv Neural Inf Process Syst”}，{“key”：“ref31”，“author”：“moldovan”，“year”：“2012”，“期刊标题”：“马尔可夫决策过程中的安全探索”}，{“key”：“ref30”，“doi asserted by”：“publisher”，“doi”：“10.5244\/C.331.11”}，{“key”：“ref37”，“首页”：“481”，“文章标题”：“通过实践和批评建议强化学习”，“volume”：“24”，“author”：“judah”，“year”：“2010”，“journal-title”：“Proc AAAI Conf Artif Intell”}，{“key”：”ref36“，”first page“：”12“，”article-title“：”Robot learning from demonstration“，”volume“：“97”，“author”:“atkeson”，“年”：“1997”，“journal-ttitle”：”Proc ICML“}”，{”key“ref35”，“作者”：“abel”，“年份”：“2017”，“新闻标题“：“Agent-Agnostic Human-in-the-Loop强化学习”}，{“key”：“ref34”，“first-page”：“1000”，“article-title”：“与人类教师的强化学习：反馈和指导的证据及其对学习绩效的影响”，“volume”：《6》，“author”：“thomaz”，“year”：“2006”，“journal-title》：“Proc AAAI”}：“publisher”，“DOI”：“10.1007\/978-3-642-16108-7_28”}，{“key”：“ref40”，“DOI-asserted-by”：“publicher”，“DOI”：”10.1016\/j.tcs.2017.11.020“}”，{”key“：”ref11“，”first page“：key“：”ref12“，”author“：”amodei“，”year“：“2016”，“journal-title”：“ai安全中的具体问题”}，{“key”：“ref13”，“first page”：”1437“，“article-title“：“关于安全强化学习的综合调查”，“volume”：：“16”，“author”：“garc\u00eda”，“year”：“2015”，“日记标题”：“J Mach Learn Res”}、{“密钥”：“ref14”，“doi-asserted-by”：“publisher”，“doi”：“10.1016\/B978-1-55860-335-6.5002 1-0“}，{”键“：“ref15”，“doi断言者”：“publisher”，“doi”：“10.1287\/opre.1050.0216”｝，｛“key”：“ref16”，“首页”：“1327”，“文章标题”：“风险敏感控制的学习算法”，“作者”：“borkar”，“年份”：“2010”，“期刊标题”：“Proc 19th Int Symp Math Theory Netw Syst”｝，｛“key”：“ref17”，“doi断言者”：“publisher”，“doi”：“10.1023\/A:1017940631555”｝，｛“密钥”：“ref18”，“author”：“di castro”，“year”：“2012”，“journal-title”：“具有方差相关风险标准的政策梯度”}，{“key”：“ref19”，“doi-sserted-by”：“publisher”，”doi“：”10.1145 \/1015330.1015430 417“，”物品标签“：“汤普森抽样在一般环境下渐近最优”，“author”：“leike”，“year”：“2016”，“journal-title”：“Proc 32nd Int Conf Understance Artif Intell（UAI）”}，{“key”：“ref27”，“doi-asserted-by”：“publisher”，“doi”：“10.1016\/B978-1-55860-247-2.50017-6”}_12“}，{”键“：“ref6”，“doi-asserted-by”：“publisher”，“doi”：“10.24963\/ijcai.2019\/302”}，{“key”：“ref29”，“首页”：“2067”，“article-title”：“无误试验：通过人工干预实现安全强化学习”，“author”：“saunders”，“year”：“2018”，“journal-title“Proc 17th Int Conf Auton Agents MultiAgent Syst”}，“DOI”：“10.1007\/978-3-319-11662-4_13”}，{“key”：“ref8”，“author”：“leike”，“year”：“2016”，“journal-title”：“non-parameteral general reinforcement learning”}、{“key”：”ref7“，”DOI-asserted-by“：”publisher“，”DOI“：”10.1093\/biomet\/25.3-4.285：“AiXijs A software demo for general reinforcement learning”}，{“key”：“ref9”，“first-page”：“1345”，“article-title”：“general reservation learning中的合理性、乐观性和保证性”，“volume”：”16“，“author”：“sunehag”，“year”：“2015”，“journal-title“：”J Mach learning Res“}”，{”key：“ref1”，“doi-asserted-by”：“publisher”，“doi”：“10.1007\/978-3642-24412-49”}，{“key”：“ref20”，“首页”：“1449”，“article-title”：“学徒制学习的游戏理论方法”，“author”：“syed”，“year”：“2008”，“journal-title“：”Proc NIPS“}，{”key“：”ref45“，”doi-asserted-by“：”publisher“，”doi“：”10.1007\/b138233“}：“模仿学习和结构化预测减少到无更新的在线学习”，“作者”：“罗斯”，“年份”：“2011年”，“新闻标题”：“第14届国际会议论文情报统计”}，{“关键”：“参考21”，“首页”：“4565”，“文章标题”：”生成性对抗性模仿学习“，“作者“：”ho“，”年份“：”2016“，”新闻标题“：“Proc-Adv Neural Inf Process Syst”}，{“key”：“ref42”，“doi-asserted-by”：“publisher”，“doi”：“10.24963\/ijcai.2017\/194”}、{“key”：《ref24》，“first page”：第143页，“article-title”：“强化学习的安全探索”，“author”：“hans”，“year”：“2008”，“journal-title“：”Proc-ESANN“}”，{”key“：”ref41“，“doi-asserted-by”：“出版商”、“内政部”：“10.1214\/aoms\/11770704456”}，{“key”：“ref23”，“article-title”：“学徒学习与强化学习相结合”，“author”：“clouse”，“year”：“1997”}，“author”：“garc\u00eda”，“year”：“2013”，“journal-title”：“Proc RLDM”}，{“key”：“ref43”，“首页”：“1589”，“article-title“：“应用于Monte-Carlo AI实现的通用折扣函数”，“author”：“lamont”，“年份”：“2017”，“日记标题”：“第16届会议自动代理多代理系统”}：“10.1613\/jair.3761”}]，“container-title”：[“IEEE Journal on Selected Areas in Information Theory”]，“原始标题”：[]，“链接”：[{“URL”：“http://\/xplorestaging.IEEE.org\/ielx7\/8700143\/9459757\/09431093.pdf？arnumber=9431093”，“内容类型”：“unspecified”，“content-version”：“vor”，“intended-application”：“similarity-checking”}，“存放”：{“日期部分”用法：[2021,11,8]]，“日期时间”：“2021-11-08T22:36:25Z”，“时间戳”：1636410985000}，“分数”：1，“资源”：{“主要”：{“URL”：“https:\/\/ieeexplore.ieee.org\/document\/9431093\/”}}，”subtitle“：[]，”shorttitle“：[]，”issued“：{”date-parts“：[2021,6]]}，‘references-count’：45，”journal-issue:{“issue”“：”2“}，”URL“：”http://\/dx.doi.org\/10.109\/jsait.2021.3079722“，“关系”：{}，“ISSN”：[“2641-8770”]，“ISSN-type”：[{“值”：“26418-8770”，“类型”：“电子”}]，“主题”：[]，“发布”：{“日期部分”：[2021,6]]}}}