{“id”:“https://openalex.org/W4280490439网址“,”doi“:”https://doi.org/10.48550/arxiv.2205.07885“,”title“:“通过优势学习在一般Tsallis熵强化学习中实施KL正则化”,”display_name“:”通过优势学习,在一般Tasllis熵加强学习中实施KL正则化“,”publication_year“:2022,”publiation_date“:”2022-01-01“,”ids“:{”openalex“:”https://openalex.org/W4280490439“,”doi“:”https://doi.org/10.48550/arxiv.2205.07885“},”language“:”en“,”primary_location“:{”is_oa“:true,”landing_page_url“:”https://arxiv.org/abs/2205.07885“,”pdf_url“:空,”源“:{”id“:”https://openalex.org/S4306400194“,”display_name“:”arXiv(康奈尔大学)“,”issn_l“:null,”issn“:null,”is_oa“:true,”is_ in_doaj“:false,”host_organization“:”https://openalex.org/I205783295“,”“host_organization_name”:“康奈尔大学”,“host_organization_lineage”:[“https://openalex.org/I205783295“],”host_organization_lineage_names“:[”康奈尔大学“],“type”:“repository”},“license”:“cc-by”,“licence_id”:“https://openalex.org/licenses/cc-by“,”version“:”submittedVersion“,”is_accepted“:false,”is_published“:false},”type“:”preprint“,”type_crossref“:“posted-content”,”indexed_in“:[”arxiv“,”datacite“],”open_access“:{”is_oa“:true,”oa_status“:”green“,”oa_url“:”https://arxiv.org/abs/2205.07885“,”any_repository_has_fulltext“:true},”authorships“:[{”author_position“:”first“,”author“:{”id“:”https://openalex.org/A5015522949“,”display_name“:”Li Zhu“,”orcid“:”https://orcid.org/0000-0003-2931-0750“},”机构“:[],”国家“:[[],”is_corresponding“:false,”raw_author_name“:”朱,凌伟“,”raw _ afiliation_strings“:[],“affiliations”:[]},{“author_position”:“middle”,“author”:{“id”:“https://openalex.org/A5032769690“,”display_name“:”郑晨“,”兽人“:”https://orcid.org/0000-0001-6353-3697“},”机构“:[],”国家“:[[],”is_corresponding“:false,”raw_author_name“:Chen,Zheng”,“raw_affiliation_strings”:[],“affiliations”:[]},{“author_position”:“middle”,“author”:{“id”:“https://openalex.org/A5031054137“,”display_name“:”Eiji Uchibe“,”orcid“:”https://orcid.org/0000-0001-7908-0258“},”机构“:[],”国家“:[[],”is_corresponding“:false,”raw_author_name“:”Uchibe,Eiji“,”raw_affiliation_strings“:[],“从属关系”:[]},{“author_position”:“last”,“author”:{“id”:“https://openalex.org/A5042074952“,”display_name“:”Takamitu Matsubara“,”orcid“:”https://orcid.org/0000-0003-3545-4814“},”机构“:[],”国家“:[[],”is_corresponding“:false,”raw_author_name“:”松原,高密”,“raw_affiliation_strings”:[],“隶属关系”:[]}],“countries_distiction_count”:0,“institutions_disticent_count“:0,”corresponding_author_ids“:[].”,“correspounding_institution_ids”:[]、“apc_list”:null,“apc_payed”:null,“has_fulltext”:false,“cited_by_count”:0,“cited_by_percentile_year“:{“min”:0,“max”:67},“biblio”:{卷:null,“问题”:nullhttps://openalex.org/T12794“,”“display_name”:“最优控制的自适应动态规划”,“score”:0.9556,“subfield”:{“id”:“https://openalex.org/subfields/1703“,”display_name“:”计算理论与数学“},”字段“:{”id“:”https://openalex.org/fields/17“,”display_name“:”Computer Science“},”domain“:{”id“:”https://openalex.org/domains/3“,”display_name“:”物理科学“}},”主题“:[{”id“:”https://openalex.org/T12794“,”“display_name”:“最优控制的自适应动态规划”,“score”:0.9556,“subfield”:{“id”:“https://openalex.org/subfields/1703“,”display_name“:”计算理论与数学“},”字段“:{”id“:”https://openalex.org/fields/17“,”display_name“:”Computer Science“},”domain“:{”id“:”https://openalex.org/domains/3“,”display_name“:”物理科学“}},{”id“:”https://openalex.org/T10462“,”display_name“:”强化学习算法“,”score“:0.9506,”subfield“:{”id“:”https://openalex.org/subfields/1702“,”display_name“:”人工智能“},”字段“:{”id“:”https://openalex.org/fields/17“,”display_name“:”Computer Science“},”domain“:{”id“:”https://openalex.org/domains/3“,”display_name“:”Physical Sciences“}}],”keywords“:[{”id“:”https://openalex.org/keywords/reinforcement-learning网站“,”display_name“:”强化学习“,”score“:0.561016}],”concepts“:[{”id“:”https://openalex.org/C117521176,“wikidata”:https://www.wikidata.org/wiki/Q7849341“,”display_name“:”Tsallis熵“,”level“:3,”score“:0.8688785},{”id“:”https://openalex.org/C97541855,“wikidata”:https://www.wikidata.org/wiki/Q830687“,”display_name“:”强化学习“,”level“:2,”score“:0.7039372},{”id“:”https://openalex.org/C41008148,“wikidata”:https://www.wikidata.org/wiki/Q21198“,”display_name“:”计算机科学“,”level“:0,”score“:0.56452405},{”id“:”https://openalex.org/C106301342,“wikidata”:https://www.wikidata.org/wiki/Q4117933“,”display_name“:”熵(时间箭头)“,”level“:2,”score“:0.48050177},{”id“:”https://openalex.org/C154945302,“wikidata”:https://www.wikidata.org/wiki/Q11660“,”display_name“:”人工智能“,”level“:1,”score“:0.4005538},{”id“:”https://openalex.org/C33923547,“wikidata”:https://www.wikidata.org/wiki/Q395“,”display_name“:”Mathematics“,”level“:0,”score“:0.34539288},{”id“:”https://openalex.org/C153180895,“wikidata”:https://www.wikidata.org/wiki/Q7148389“,”display_name“:”模式识别(心理学)“,”level“:2,”score“:0.10931489},{”id“:”https://openalex.org/C121332964,“wikidata”:https://www.wikidata.org/wiki/Q413“,”display_name“:”物理“,”等级“:0,”分数“:0.099971235},{”id“:”https://openalex.org/C97355855,“wikidata”:https://www.wikidata.org/wiki/Q11473“,”display_name“:”热力学“,”level“:1,”score“:0.09087178}],”mesh“:[],”locations_count“:2,”location“:[{”is_oa“:true,”landing_page_url“:”https://arxiv.org/abs/2205.07885“,”pdf_url“:空,”源“:{”id“:”https://openalex.org/S4306400194“,”display_name“:”arXiv(康奈尔大学)“,”issn_l“:null,”issn“:null,”is_oa“:true,”is_ in_doaj“:false,”host_organization“:”https://openalex.org/I205783295“,”“host_organization_name”:“康奈尔大学”,“host_organization_lineage”:[“https://openalex.org/I205783295“],”host_organization_lineage_names“:[”康奈尔大学“],“type”:“repository”},“license”:“cc-by”,“licence_id”:“https://openalex.org/licenses/cc-by“,”version“:”submittedVersion“,”is_accepted“:false,”is_published“:false},{”is_oa“:false,”landing_page_url“:”https://api.datacite.org/dois/10.48550/arxiv.2205.07885“,”pdf_url“:空,”源“:{”id“:”https://openalex.org/S4393179698“,”display_name“:”DataCite API“,”issn_l“:null,”issn“:null,”is_oa“:true,”is_ in_doaj“:false,”host_organization“:”https://openalex.org/I4210145204“,”“host_organization_name”:“DataCite”,“host_organization_lineage”:[“https://openalex.org/I4210145204“],”host_organization_lineage_names“:[”DataCite“],“type”:“metadata”},“license”:null,“licence_id”:null,“version”:null}],“best_oa_location”:{“is_oa”:true,“landing_page_url”:“https://arxiv.org/abs/2205.07885“,”pdf_url“:空,”源“:{”id“:”https://openalex.org/S4306400194“,”display_name“:”arXiv(康奈尔大学)“,”issn_l“:null,”issn“:null,”is_oa“:true,”is_in_doaj“:false,”host_organization“:”https://openalex.org/I205783295“,”“host_organization_name”:“康奈尔大学”,“host_organization_lineage”:[“https://openalex.org/I205783295“],”host_organization_lineage_names“:[”康奈尔大学“],“type”:“repository”},“license”:“cc-by”,“licence_id”:“https://openalex.org/licenses/cc-by“,”version“:”submittedVersion“,”is_accepted“:false,”is_published“:false},”sustainable_development_goals“:[],”grants“:[],”datasets“:],”versions“:[https://openalex.org/W51653785","https://openalex.org/W4391375266","https://openalex.org/W3002753104","https://openalex.org/W2600246793","https://openalex.org/W2142036596","https://openalex.org/W2077600819","https://openalex.org/W2072657027","https://openalex.org/W2061531152","https://openalex.org/W2007980826","https://openalex.org/W1979597421“],”ngrams_url“:”https://api.openalex.org/works/W4280490439/ngrams“,”abstract_inverted_index“:{“最大值”:[0],”Tsallis“:[1105124],”熵“:[2,24135],”(MTE)“:[3],”框架“:[4],”in“:[5,71],”强化“:[6],”学习“:[7],”has“:[8],”获得“:[9],”流行“:[10],”最近“:[11],”by“:[12,65,74,81,89],”美德“:[13],”of“:[14,46],“its”:[15,41],“flexible”:[16],“modeling”:[17],“choices”:[1],“include”:[19],““:[20,44,52,99],”广泛“:[21],”使用“:[22],”香农“:[23134],”和“:[25,35,56,86],”稀疏“:[26],”熵。“:[27],”“然而,”:[28],“非香农”:[29],“熵”:[30],“遭受”:[31],“来自”:[32],“近似值”:[33],“错误”:[34],“后续”:[36],“表现不佳”:[37],“要么”:[38],“到期”:[39],“至”:[40,61,94,98114131],“敏感度”:[42],“或”:[43],“缺乏”:[45],“封闭形式”:[47],“策略”:[48],“表达式。“:[49],“To”:[50],“improve”:[51118],“权衡”:[53],“between”:[54,84],“flexibility”:[55],“experimental”:[57],“performance”,“:[58],“we”:59],“propose”:[60],“strength”:[62],“their”:[63],“error-robstructure”:[64],“enforcement”:[66],“inclimplict”:[67],“Kullback-Leibler”:[68],“(KL)”:[69],“正规化”:[70],“MTE”:[72100],“积极性”:[73],“蒙乔森”:[75],“DQN“:[76],”(MDQN)。“:[77],“我们”:[78],“做”:[79],“所以”:[80],“绘图”:[82],“连接”:[83],“MDQN”:[85,91],“优势”:[87],“学习”:[88],“其中”:[90],“是”:[92109],“显示”:[93],“失败”:[95],“on”:[96111],“概括”:[97],“框架。”:[101],“The”:[102],“建议”:[103],“方法”:[104],“优势”:[106],“学习”:[107],“(TAL)”:[108],“验证”:[110],“广泛”:[112],“实验”:[113],“不”:[115],“仅”:[116],“显著”:[117],“关于”:[119],“Tsallis-DQN”:[120],“对于”:[121],“各种”:[122],“非封闭形式”:[123],“熵”:[125],“但”:[126],“也”:[127],“展品”:[128],“可比”:[129],“性能”:[130],“最先进”:[132],“最大”:[133],“算法。“:[136]},”cited_by_api_url“:”https://api.openalex.org/works?filter=cites:W4280490439“,”counts_by_year“:[],”updated_date“:”2024-06-15T21:05:27.949790“,”created_date:“2022-05-22”}“