{“状态”:“确定”,“消息类型”:“工作”,“信息版本”:“1.0.0”,“邮件”:{“索引”:{“日期-部件”:[[2024,3,19]],“日期-时间”:“2024-03-19T12:23:11Z”,“时间戳”:1710850991812},“参考-计数”:36,“出版商”:“电气与电子工程师学会(IEEE)”,“许可证”:[{“开始”:01T00:00:00Z“,“timestamp”:1514764800000},“content-version”:“vor”,“delay-in-days”:0,“URL”:“https:\/\/ieeexplore.iee.org\/Xplorehelp\/downloads\/license-information\/OAPA.html”}],“funder”:[{“DOI”:“10.13039\/501100007129”,“name”:“山东省自然科学基金会”,“DOI-asserted-by”:“publisher”,”“award”:[“ZR2017PF005”]},{“doo”I“:”10.13039\/501100001809“,“名称”:“国家自然科学基金会”,“doi断言者”:“出版商”,“奖项”:[“61873138”,“61803218”,“61573353”,“61533017”,“61573205”]}],“内容域”:{“域”:[],“交叉标记限制”:false},“短容器标题”:[“IEEE Access”],“已发表的印刷品”:{“日期部分”:[[2018]]},“doi”:“10.1109\/Access.2018.278853”,“类型”:“journal-article”,“created”:{“date-parts”:[[2018,10,31]],“date-time”:“2018-10-31T19:04:19Z”,“timestamp”:1541012659000},“page”:“70223-70235”,“source”:”Crossref“,“is-referenced-by-count”:8,“title”:[“多个协作代理的基于梯度的强化学习算法”],“prefix”:“10.1109”,”volume“:”6“,“author”:[{“ORCID”:“http://\/orcid.org\/00000-0002-6615-629X”,“authenticated-orcid”:false,“given”:“Zhen”,“family”:“Zhang”,“sequence”:“first”,“affiliation”:[{“name”:“中国青岛大学自动化学院”}]},{“givens”:“Dongqing”,“家族”:“Wang”,“serquence”:“additional”,“filiance”:[[{”name“:“中国山东青岛大学自动化院”}]},{“given”:“Dongbin”,“family”:“Zhao”,“sequence”:“additional”,“affiliation”:[{“name”:“中国科学院自动化研究所复杂系统管理与控制国家重点实验室”}]},{:“中国青岛大学自动化学院”}]},{“given”:“Tingting”,“family”:“Song”,“sequence”:“additional”,“affiliation”:[{“name”:“中国山东青岛大学自动化院”}]],“member”:“263”,“reference”:[}“key”:”ref33“,”doi-asserted-by“:”publisher“,”doi“:”10.1109\/ACCES.2018.2854283“},“key“:”ref32“,”第一页“:“1789”,“article-title”:“通过支付传播进行协作多智能体强化学习”,“volume”:“7”,“author”:“kok”,“year”:“2006”,“journal-title“:”J Mach Learn Res“},{“key”:”ref31“,“first-page”:“1041”,“article-title”:“加速DCOP算法ADOPT的预处理技术”,“作者”:“syed”,“年份”:“2005”,“日记标题”:“Proc AAMAS”},{“key”:“ref30”,“doi-asserted-by”:“publisher”,“doi”:“10.1007\/s10462-015-9447-5”}key“:”ref34“,”doi-asserted-by“:”publisher“,”doi“:”10.1016\/S1874-1029(13)60031-2“},{”key“:“ref10”,“doi-asserted-by”:“publisher”,“doi”:“10.1109\/TPWRS.2011.2166091”},{“key”:“ref11”,“doi-asserte-by”:“publisher”,“DI:”10.1016\/j.neunet.2011.09.005“},“key“:”ref12“,”doi-assert-by“:”publisher“,”doi“:”10.1007\/BF00992698“}、{“key”:”ref13“,”article-title“:”使用连接主义系统的在线Q-学习”,“作者”:“rummery”,“年份”:“1994”},{“key”:“ref14”,“doi-asserted-by”:“publisher”,“doi”:“10.1109\/TSMCC.2012.2218595”},{“key”:”ref15“,”doi-assert-by“:”publisher“,”doi“:”10.1109\/TAC.2009.2037462“},”key“:”ref16“,”首页“:”878“,”article-title“:”使用actor-critic方法的多智能体强化学习“,”author“:”li“,”年份“:”2008“,”新闻标题“:“Proc Int Conf Mach Learn Cybern”},{“key”:“ref17”,“首页”:“541”,“article-title”:“一般和游戏中梯度动力学的纳什收敛”,“author”:“singh”,“year”:“2000”,“journal-title“:”Proc UAI“},”{“密钥”:“ref18”,“doi-asserted-by”:“publisher”,“doi”:“10.1016\/S0004-3702(02)00121-2”}“doi-asserted-by”:“publisher”,“doi”:“10.1109\/ADPRL.2007.368173”},{“key”:“ref28”,“首页”:“2635”,“article-title”:“共同兴趣和固定和随机博弈中的多智能体强化学习:实验研究”,“volume”:”9“,”author“:”bab“,”year“:”2008“,”journal-title“:”J Mach Learn Res“},”{“key”:“ref4”,“doi-asserted-by”:“publisher”,“doi”:”10.1109\/TCYB.2015.24886 80“},{“key”:“ref27”,“首页”:“322”,“article-title”:“一般和游戏中的友好Q-learning”,“author”:“littman”,“year”:“2001”,“journal-title“:”Proc ICML“},{“key”:”ref3“,“doi-asserted-by”:“publisher”,“doi”:“10.1016\/j.automatica.2012.05.074”},“key“:”ref6“,”doi-assert-by“:”publisher“,”doi“:”10.1109 \/TSMC.2016.2645699“},{“键”:“参考29”,“first page”:“4878”,“article-title”:“具有联合状态值近似的多智能体Q-learning”,“author”:“chen”,“year”:“2011”,“journal-title“:”Proc CCC“},{“key”:”ref5“,“first-page”:“1039”,“article-titel”:“一般和随机博弈的Nash Q-learing”,“volume”:第4“,”author“:”hu“,”year“2003”,“日记标题”:“J Mach Learn Res”},8英寸,“doi-asserted-by”:“publisher”,“doi”:“10.1371\/journal.pone.0181747”}le-title“:“函数逼近强化学习的策略梯度方法”,“author”:“sutton”,“year”:“2000”,“journal-title”:“Advances in Neural Information Processing Systems 12”},{“key”:“ref1”,“doi-asserted-by”:“publisher”,“doi”:“10.1017\/S026988891200057”},{“key”:“ref22”,“doi-asserted-by”:“publisher”,“doi”:“10.1613\/jair.4818”},{“key”:”ref21“,”doi-assert-by“:”publisher“,”doi“:”10.1016\/j.artint.2007.01.004“},”{“key”:“ref24”,“article-title”:“双层双动作游戏中的Q-learning”,“author”:“babes”,“year”:“2009”,“journal-title“:”Proc AAMAS“},{“key”:“ref23”,“首页”:“1145”,“文章标题”:“两层双动作游戏中Boltzmann Q学习的动力学”,“卷”:“85”,“作者”:“kianercy”,“年份”:“2012”,“日志标题”:“Phys-Rev E Stat Phys-Plasmas Fluids Relat Interdiscip Top”},{“键”:“ref26”,“首页”:“242”,“文章标题”:”相关Q学习“,“作者“格林瓦尔德”,“年”:“2003”,“日记标题”:,“first page”:“840”,“article-title”:“FMR-GA-A一种基于梯度提升的协作多智能体强化学习算法”,“author”:“zhang”,“year”:“2017”,“journal-title“:”Proc ICONIP“}],“container-title(容器)”:[“IEEE Access”],“original-tittle”:[],“link”:[{“URL”:“http://\explorestaging.iee.org\/ielx7\/6287639\/8274985\/08517104.pdf?arnumber=8517104“,”content-type“:”unspecified“,”content-version“:”vor“,”intended-application“:”similarity-checking“}],”deposed“:{”date-parts“:[2024,15]],”date-time“:“2024-01-15T21:22:23Z”,”timestamp“:17053743000},”score“:1,”resource“:”{“primary”:{“URL”:“https:\/\ieeexplore.iee.org\/document\/8517104\/”},“副标题”:[],“短标题”:[],“发布“:{“date-parts”:[[2018]]},“references-count”:36,“URL”:“http://\/dx.doi.org\/10.109\/access.2018.2878853”,“relation”:{},”ISSN“:[”2169-3536“],“ISSN-type”:[{“value”:“2169-3566”,“type”:“electronic”}],“subject”:[],“published”:{”date-part“:[2018]]}}}}