{“状态”:“确定”,“消息类型”:“工作”,“信息版本”:“1.0.0”,“邮件”:{“索引”:{“日期部分”:[[2024,9,14]],“日期时间”:“2024-09-14T23:34:08Z”,“时间戳”:1726356848611},“参考计数”:29,“出版商”:“电气与电子工程师学会(IEEE)”,“问题”:“11”,“许可证”:[{“开始”:}“日期部分“:[2020,11,1]],”日期时间“:“2020-11-01T00:00:00Z”,“timestamp”:1604188800000},“content-version”:“vor”,“delay-in-days”:0,“URL”:“https:\/\/ieeexplore.iee.org\/Xplorehelp\/downloads\/license-information\/ieee.html”},{“start”:{“date-parts”:[[2020,11,1]],“date-time”:“2020-11-01T00:00 Z”,”timestamp“:16001888000000},”content-versation“stm-asf“,”delay-in-days“:0,”URL“:“https:\/\/doi.org\/10.15223\/policy-029”},{“start”:{“date-parts”:[[2020,11,1]],“date-time”:“2020-11-01T00:00:00Z”,“timestamp”:16041888000000},“content-version”:“stm-asf”,“delay-in-days”:0,“URL”:“http:\/\/doi.org\/10.15223\/policy-037”}],“出资人”:[{“name”:“国际科技合作项目中国”,“奖项”:[“2019YFE0100200”]},{“name”:“清华大学-丰田自动化汽车人工智能技术联合研究中心”}],“content-domain”:{“domain”:[],“crossmark-restriction”:false},“short-container-title”:[“IEEE Trans.Veh.Technol.”],“published-print”:{“date-parts”:[[2020,11]]},”DOI“:”10.1109\/tvt.2020.3026111“,”type“:”“journal-article”,“created”:{-“date-ports”:[[2020,9,23]],“date-time”:“2020-09-23T20:42:34Z”,“timestamp”:1600893754000},“page”:“12597-12608”,“source”:”Crossref“,“is-referenced-by-count”:89,“title”:[“通过近距离政策优化实现交叉口联网和自动化车辆的集中合作”],“prefix”:“10.1109”,“volume”::“69”,“author”:[{“给定”:“Yang”,“family”:“Guan”、“sequence”:“first”,“affiliation”:[]},{“given”:“Yangang”,“family”:“Ren”,“sequences”:“additional”,“filiation“:[]{“ORCID”:”http://\/ORCID.org\/00000-0003-4923-3633“,”authenticated-ORCID“:false,”given“:”Shengbo Eben“,”family“:”Li“,”sequence“:”additional-0002-2664-2509“,”经认证“:false,“given”:“Qi”,“family”:“Sun”,“sequence”:“additional”,“affiliation”:[]},{“givent”:“Laiquan”,“家族”:“Luo”,“sequence”:“additionable”,“filiation“:[]{”ORCID:“http://ORCID.org\/00000-0002-9333-7416”,“authenticated-ORCID”:false、“givend”:“Keqiang”,“家庭”:“Li”,“序列”:“additional”、“affidiation”:“[]}],“member”“:”263“,”reference“:[{”key“:”ref10“,“doi由”:“publisher”断言,“doi”:“10.1109\/TITS.2011.2178836”},{“key”:“ref11”,“doi由”:“publisher”断言,“doi”:“10.1109\/TITS.2014.2354380”},{“key”:“ref12”,“doi由”:“publisher”断言,“doi”:“10.1109\/TITS.2016.2514271”},{“key”:“ref13”,“doi由”:“publisher”断言,“doi”:“10.1109\/IVS”断言。2017.7995727“},{”key“:”ref14“,”doi断言“:“publisher”,“DOI”:“10.1109\/ROBIO.2018.8665334”},{“key”:“ref15”,“DOI-asserted-by”:“publicher”,“DOI”:”10.1109\/ICRA.2018.8460934“},}“key:”ref16“,“article-title”:“Carla:开放式城市驾驶模拟器”,“author”:“dosovitskiy”,“year”:“0”,“journal-title“:”Proc-Conf Robot Learn“}”,{”key“:”ref17“,”首页“:”297“,”文章标题“:“无需依赖标记驾驶数据的自驾驶决策的分层强化学习”,“卷”:“14”,“作者”:“精良”,“年份”:“2019”,“期刊标题”:“IET智能交通系统”},{“密钥”:“ref18”,“doi-asserted-by”:“publisher”,“doi”:“10.1109\/ICRA.2019.8793742”}、{“key”:“ref19”,“article-title”:“结合深度强化学习和基于安全的自动驾驶控制”,“author”:“xiong”,“year”:“2016”,“journal-title”:“arXiv 1612 00147”},{“key”:“ref28”,“article-title“:“Model-ensemble trust-region policy optimization”,“author”:“kurutach”,“year”::“publisher”,“DOI”:“10.1109\/TCST.2019.2908146”},{“key”:“ref27”,“first-page”:“2944”,“article-title”:“通过随机值梯度学习连续控制策略”,“author”:“heess”,“year”:“0”,“journal-title“:”Proc-Adv Neural Inf Process Syst“},”{“密钥”:“ref3”,“DOI-asserted-by”:“publider”,“DOI”:”10.1109\/TVT.2019.2926733“}”,{“键”:“参考6”,“doi-asserted-by”:“publisher”,“doi”:“10.1109\/TMECH.2017.2647987”},{“key”:“ref29”,“article-title”:“基于模型的有效无模型强化学习的价值评估”,“author”:“feinberg”,“year”:“2018”,“journal-title“:”CoRR“},”{“key”:“ref5”,“doiasserted-by“:”publisher“,”doi“:”10.1109\/TIE.2017.27574“}”,{”key“:”参考8“,”doi-asserted-by“:“publisher”,“DOI”:“10.3141\/2381-08”},{“key”:“ref7”,“DOI-asserted-by”:“publicher”,“DOI”:”10.1109\/JIOT.2020.2973977“},“key“:”ref2“,”DOI-assert-by“:”publisher“,”DOI“:”10.109\/TVT.2017.2769084“}.2015.01.007“},{”key“:”ref1“,”DOI-asserted-by“:”publisher“,”DOI“:“10.1109\/TVT.2015.2483779”},{“key”:“ref20”,“first page”:”1057“,“article-title”:“函数逼近强化学习的策略梯度方法”,“author”:“sutton”,“year”:“0”,“journal-title“:“Proc Advances Neural Inf Process Syst”}:“强化学习导论”},{“key”:“ref21”,“first page”:”441“,“article-title”:“自然行动者批判算法中的偏差”,“author”:“thomas”,“year”:“0”,“journal-title“:”Proc Int Conf Mach Learn“},”{“密钥”:“ref24”,“首页”:“1531”,“article-title:”A natural policy gradient“,”author“:”kakade“,”year“:”0“,”journal-title“:“Proc Adv Neural Inf Process Syst”},{“key”:“ref23”,“article-title”:“使用广义优势估计的高维连续控制”,“author”:“schulman”,“year”:“0”,“journal-title“:”Proc Int Conf Learn Representation“},”{“key”:“ref26”,“article-title”:“actor-critic control的在线模型学习算法”,“author”:“grondman”,”year“:“2015”},{“key”:“ref25”,“first-page”:“465”,“article-title”:“Pilco:一种基于模型和数据高效的策略搜索方法”,“author”:“deisenroth”,“year”:“0”,“journal-title“:”Proc 28 Int Conf Mach Learn“}],“container-title:“http://\/xplorestaging.ieee.org\/ielx7\/25\/9258483\/09204585.pdf?arnumber=9204585”,“content-type”:“未指定”,“内容-版本”:“vor”,“intended-application”:“相似性检查”},“存放”:{“日期-部件”:[2022,4,27]],“日期-时间”:“2022-04-27T13:45:03Z”,“时间戳”:1651067103000},”分数“:1,”资源“:{”primary“:{”URL“:“https:\/\/ieeexplore.iee.org\/document\/9204585\/”}},“副标题”:[],“短标题”:[],“发布”:{“日期-部件”:[[2020,11]]}45“,”1939-9359“],”ISSN-type“:[{”value“:”0018-9545“,“type”:”print“},{”value“:”1939-9259“,“type”:“electronic”}],“subject”:[],“published”:{“date-parts”:[[2020,11]]}}