{“id”:“https://openalex.org/W4377101750“,”doi“:”https://doi.org/10.1016/j.jcp.2023.112238“,”title“:“参数化环境下强化学习的随机最大值原理方法”,”display_name“:”参数化环境中强化学习的一种随机最大值原则方法“,”publication_year“:2023,”publitation_date“:”2023-09-01“,”ids“:{”openalex“:”https://openalex.org/W4377101750“,”doi“:”https://doi.org/10.1016/j.jcp.2023.112238“},”language“:”en“,”primary_location“:{”is_oa“:false,”landing_page_url“:”https://doi.org/10.1016/j.jcp.2023.112238“,”pdf_url“:空,”源“:{”id“:”https://openalex.org/S148709879“,”display_name“:”计算物理杂志“,”issn_l“:”0021-9991“,”isn“:[”0021-999“,”1090-2716“],”is_oa“:false,”is_ in_doaj“:fase,”host_organization“:”https://openalex.org/P4310320990“,”“host_organization_name”:“Elsevier BV”,“host_organization_lineage”:[“https://openalex.org/P4310320990“],”host_organization_lineage_names“:[”Elsevier BV“],“type”:“journal”},“license”:null,“licence_id”:null,“version”:null,“is_accepted”:false,“is_published”:false},”type“:”article“,”type_crossref“:“jornal-article”,“indexed_in”:[”arxiv“,”crossref“],‘open_access”:{“is_oa”:true,“oa_status”:“green”,“oa_url”:“https://arxiv.org/pdf/2208.02241“,”any_repository_has_fulltext“:true},”authorships“:[{”author_position“:”第一“,”作者“:{”id“:”https://openalex.org/A5043192992“,”display_name“:”R Archibald“,”orcid“:”https://orcid.org/0000-0002-4538-9780},“机构”:[{“id”:https://openalex.org/I1289243028“,”display_name“:”橡树岭国家实验室“,”ror“:”https://ror.org/01qz5mb56“,”country_code“:”US“,”type“:“facility”,”lineage“:[”https://openalex.org/I1289243028","https://openalex.org/I1330989302","https://openalex.org/I39565521","https://openalex.org/I4210159294“]}],”国家“:[”美国“],”is_corresponding“:false,”raw_author_name“:”理查德·阿奇博尔德“,”raw _affiliation_strings“:[“计算机科学与计算科学,橡树岭国家实验室,美国田纳西州橡树岭”]},{“author_position”:“middle”,“author”:{“id”:“https://openalex.org/A5051038295“,”display_name“:”丰宝“,”兽人“:”https://orcid.org/0000-0002-1302-8120},“机构”:[{“id”:https://openalex.org/I103163165“,”display_name“:”佛罗里达州立大学“,”ror“:”https://ror.org/05g3dte14“,”country_code“:”US“,”type“:“教育”,”世系“:[”https://openalex.org/I103163165“]}],”countries“:[”US“],”is_corresponding“:true,”raw_author_name“:”Feng Bao“,”rau_affiliation_strings“:【”佛罗里达州立大学数学系,佛罗里达州塔拉哈西“]},{”author_position“:”last“,”author“:{”id“:”https://openalex.org/A5016246712“,”display_name“:”Jiongmin Yong“,”orcid“:”https://orcid.org/0000-0002-6410-8999},“机构”:[{“id”:https://openalex.org/I106165777“,”display_name“:”中佛罗里达大学“,”ror“:”https://ror.org/036nfer12“,”country_code“:”US“,”type“:“教育”,”世系“:[”https://openalex.org/I106165777“]}],”countries“:[”US“],”is_corresponding“:false,”raw_author_name“:”Jiongmin Yong“,”raw _affiliation_strings“:【”美利坚合众国佛罗里达州奥兰多市中佛罗里达大学数学系“]}】,”contries_distict_count“:1,”institutions_disticent_count“:3,”corresponding_author_ids“:[“”https://openalex.org/A5051038295“],”对应的机构ID“:[”https://openalex.org/I103163165“],”apc_list“:{”value“:3750,”currency“:”USD“,”value_USD“:3750,”出处“:”doaj“},”apc_payed“:{”value“:3750,”currency“:”USD“,”value_USD“:3750,”出处“:”doaj“},”has_fulltext“:false,”cited_by_count“:1,”cited_by_percentle_year“:{”min“:78,”max“:87},”biblio“:{”volume“:”488“,”issue“:null,“first_page”:“112238”,“last_page”:“112238”},“is_retracted“:false,”is_paratext“:fase,”primary_topic“:{”id“:”https://openalex.org/T10462“,”display_name“:”强化学习算法“,”score“:0.998,”subfield“:{”id“:”https://openalex.org/subfields/1702“,”display_name“:”人工智能“},”字段“:{”id“:”https://openalex.org/fields/17“,”display_name“:”Computer Science“},”domain“:{”id“:”https://openalex.org/domains/3“,”display_name“:”物理科学“}},”主题“:[{”id“:”https://openalex.org/T10462“,”display_name“:”强化学习算法“,”score“:0.998,”subfield“:{”id“:”https://openalex.org/subfields/1702“,”display_name“:”人工智能“},”字段“:{”id“:”https://openalex.org/fields/17“,”display_name“:”Computer Science“},”domain“:{”id“:”https://openalex.org/domains/3“,”display_name“:”物理科学“}},{”id“:”https://openalex.org/T12794“,”“display_name”:“最优控制的自适应动态规划”,“score”:0.9728,“subfield”:{“id”:“https://openalex.org/subfields/1703“,”display_name“:”计算理论与数学“},”字段“:{”id“:”https://openalex.org/fields/17“,”display_name“:”Computer Science“},”domain“:{”id“:”https://openalex.org/domains/3“,”display_name“:”物理科学“}},{”id“:”https://openalex.org/T10848“,”“display_name”:“进化算法中的多目标优化”,“score”:0.9672,“subfield”:{“id”:“https://openalex.org/subfields/1703“,”display_name“:”计算理论与数学“},”字段“:{”id“:”https://openalex.org/fields/17“,”display_name“:”Computer Science“},”domain“:{”id“:”https://openalex.org/domains/3“,”display_name“:”物理科学“}}],”关键词“:[{”id“:”https://openalex.org/keywords/reinforcement-learning网站“,”display_name“:”强化学习“,”score“:0.60601},{”id“:”https://openalex.org/keywords/particle-swarmotimization网址“,”display_name“:”粒子群优化“,”score“:0.540251}],”concepts“:[{”id“:”https://openalex.org/C165464430,“wikidata”:https://www.wikidata.org/wiki/Q1570441网址“,”display_name“:”参数化复杂性“,”level“:2,”score“:0.88468575},{”id“:”https://openalex.org/C97541855,“wikidata”:https://www.wikidata.org/wiki/Q830687“,”display_name“:”强化学习“,”level“:2,”score“:0.8420007},{”id“:”https://openalex.org/C2778770139,“wikidata”:https://www.wikidata.org/wiki/Q1966904“,”display_name“:”Solver“,”level“:2,”score“:0.68616927},{”id“:”https://openalex.org/C41008148,“wikidata”:https://www.wikidata.org/wiki/Q21198“,”display_name“:”计算机科学“,”level“:0,”score“:0.6204637},{”id“:”https://openalex.org/C126255220,“wikidata”:https://www.wikidata.org/wiki/Q141495“,”display_name“:”数学优化“,”level“:1,”score“:0.59555066},{”id“:”https://openalex.org/C137631369,“wikidata”:https://www.wikidata.org/wiki/Q7617831“,”display_name“:”随机编程“,”level“:2,”score“:0.4860073},{”id“:”https://openalex.org/C91575142,“wikidata”:https://www.wikidata.org/wiki/Q1971426“,”display_name“:”最优控制“,”level“:2,”score“:0.48066127},{”id“:”https://openalex.org/C37404715,“wikidata”:https://www.wikidata.org/wiki/Q380679“,”display_name“:”动态编程“,”level“:2,”score“:0.4695338},{”id“:”https://openalex.org/C170131372,“wikidata”:https://www.wikidata.org/wiki/Q7617811“,”display_name“:”随机控制“,”level“:3,”score“:0.44039547},{”id“:”https://openalex.org/C2780791683,“wikidata”:https://www.wikidata.org/wiki/Q846785“,”display_name“:”Action(physical)“,”level“:2,”score“:0.42835483},{”id“:”https://openalex.org/C154945302,“wikidata”:https://www.wikidata.org/wiki/Q11660“,”display_name“:”人工智能“,”level“:1,”score“:0.36910963},{”id“:”https://openalex.org/C33923547,“wikidata”:https://www.wikidata.org/wiki/Q395“,”display_name“:”数学“,”等级“:0,”分数“:0.3072884},{”id“:”https://openalex.org/C11413529,“wikidata”:https://www.wikidata.org/wiki/Q8366“,”display_name“:”Algorithm“,”level“:1,”score“:0.24422604},{”id“:”https://openalex.org/C121332964,“wikidata”:https://www.wikidata.org/wiki/Q413“,”display_name“:”物理“,”级别“:0,”分数“:0.0},{”id“:”https://openalex.org/C62520636,“wikidata”:https://www.wikidata.org/wiki/Q944“,”display_name“:”量子力学“,”level“:1,”score“:0.0}],”mesh“:[],”locations_count“:3,”location“:[{”is_oa“:false,”landing_page_url“:”https://doi.org/10.1016/j.jcp.2023.112238“,”pdf_url“:空,”源“:{”id“:”https://openalex.org/S148709879“,”display_name“:”计算物理杂志“,”issn_l“:”0021-9991“,”isn“:[”0021-999“,”1090-2716“],”is_oa“:false,”is_ in_doaj“:fase,”host_organization“:”https://openalex.org/P4310320990“,”“host_organization_name”:“Elsevier BV”,“host_organization_lineage”:[“https://openalex.org/P4310320990“],”host_organization_lineage_names“:[”Elsevier BV“],”type“:”journal“},”license“:null,”license_id“:null,”version“:null,”is_accepted“:false,”is_published“:false},{”is_oa“:true,”landing_page_url“:”https://arxiv.org/abs/2208.02241,“pdf_url”:https://arxiv.org/pdf/208.02241,“源”:{“id”:https://openalex.org/S4306400194“,”display_name“:”arXiv(康奈尔大学)“,”issn_l“:null,”issn“:null,”is_oa“:true,”is_in_doaj“:false,”host_organization“:”https://openalex.org/I205783295“,”“host_organization_name”:“康奈尔大学”,“host_organization_lineage”:[“https://openalex.org/I205783295“],”host_organization_lineage_names“:[”Cornell University“],“type”:“repository”},“license”:null,“licence_id”:null,“version”:“submittedVersion”,“is_accepted”:false,“is_published”:false},{“is_oa”:true,“landing_page_url”:“网址:http://arxiv.org/abs/2208.02241,“pdf_url”:http://arxiv.org/pdf/2208.02241,“源”:{“id”:https://openalex.org/S4306400194“,”display_name“:”arXiv(康奈尔大学)“,”issn_l“:null,”issn“:null,”is_oa“:true,”is_in_doaj“:false,”host_organization“:”https://openalex.org/I205783295“,”“host_organization_name”:“康奈尔大学”,“host_organization_lineage”:[“https://openalex.org/I205783295“],”host_organization_lineage_names“:[”康奈尔大学“],“type”:“repository”},“license”:null,“licence_id”:null,“version”:“submittedVersion”,“is_accepted”:false,“is_published”:false}],“best_oa_location”:{“is_oa”:true,“landing_page_url”:“https://arxiv.org/abs/2208.02241,“pdf_url”:https://arxiv.org/pdf/2208.02241,“源”:{“id”:https://openalex.org/S4306400194“,”display_name“:”arXiv(康奈尔大学)“,”issn_l“:null,”issn“:null,”is_oa“:true,”is_in_doaj“:false,”host_organization“:”https://openalex.org/I205783295“,”“host_organization_name”:“康奈尔大学”,“host_organization_lineage”:[“https://openalex.org/I205783295“],”host_organization_lineage_names“:[”康奈尔大学“],“type”:“repository”},“license”:null,“licence_id”:null,“version”:“submittedVersion”,“is_accepted”:false,“is_published”:false},”sustainable_development_goals“:[],“grants”:[]、“datasets”:[],“versions”:[】,“referenced_works_count”:16,“referrenced_works”:https://openalex.org/W1501586228","https://openalex.org/W1980091117","https://openalex.org/W2006718988","https://openalex.org/W2033603136","https://openalex.org/W2095445700","https://openalex.org/W2162733643","https://openalex.org/W2343354150","https://openalex.org/W2554567359","https://openalex.org/W2770316314","https://openalex.org/W2787259794","https://openalex.org/W2964546405","https://openalex.org/W3037975593","https://openalex.org/W3107285692","https://openalex.org/W32403112","https://openalex.org/W4280632784","https://openalex.org/W4362203700“],”related_works“:[”https://openalex.org/W4300052603","https://openalex.org/W3116776104","https://openalex.org/W3048549205","https://openalex.org/W2791142594","https://openalex.org/W2599556035","https://openalex.org/W2086136595","https://openalex.org/W2058868784","https://openalex.org/W175006096","https://openalex.org/W1590308505","https://openalex.org/W1518635954“],”ngrams_url“:”https://api.openalex.org/works/W4377101750/ngrams网站“,”“abstract_inverted_index”:{“In”:[0],“this”:[1],“work”:[2],“we”:[3,39],“introduce”:[4],“a”:[5],“随机”:[6],“最大值”:[7],“原则”:[8121],“(SMP)”:[9],“方法”:[10,92],“for”:[11,63,76,93],“求解”:[12],“the”:[13,18,21,24,34,53,57,61,64,80,90102108117],“强化”:[14,94],“学习”:[15,74,95],“问题”:[16],“与”:[17116],“假设“:[19],”that“:[20,89],”unknowns“:[22],”in“:[23107],”environment“:[25,54],”can“:[26,96],”be“:[27],“parameterized”:[28],”based“:[29122],”on“:[30],”physical“:[31],”knowledge。“:[32],”For“:[33],”development“:[35],”of“:[36],”numerical“:[37],”algorithms,“:[38],”apply“:[40],”an“:[41,70],”effect“:[42],”online“:[43],”parameter“:[44,55],”estimation“:[45],”method“:[46,75],”as“:[47],”ourr“:[48],”exploration“:%49],”technology“:[50],”to“:[51,87”],“估算”:[52],“期间”:[56],“培训”:[58113],“程序”:[59],“和”:[60101],“开发“:[62],“最优”:[65],“政策”:[66,77],“是”:[67],“实现”:[68],“通过”:[69],“高效”:[71],“落后”:[72],“行动”:[73],“改进”:[78],“根据”:[79],“SMP”:[81,91109],“框架。”:[82],“数值”:[83],“实验”:[84],“are”:[85],“呈现”:[86],“演示”:[88],“生产”:[97],“可靠”:[98],“控制”:[99],“策略”:[100],“梯度“:[103],”下降“:[104],”类型“:[105],”优化“:[106],”求解器“:[110],”需要“:[111],”较少“:[112],”剧集“:[114],”比较“:[115],”标准“:[118],”动态“:[119],”编程“:[120],”方法。“:[123]},”cited_by_api_url“:”https://api.openalex.org/works?filter=引用:W4377101750“,”counts_by_year“:[{”年“:2024,”cited_by_count“:1}],”更新日期“:”2024-06-05T17:39:39.735587“,”创建日期“:“2023-05-20”}