{“id”:“https://openalex.org/W4288054947“,”doi“:”https://doi.org/10.48550/arxiv.2207.12045“,”title“:”定期MDP在线强化学习“,”display_name“:”周期MDP在线增强学习“,“publication_year”:2022,”publication_date“:”2022-01-01“,”ids“:{”openalex“:”https://openalex.org/W4288054947“,”doi“:”https://doi.org/10.48550/arxiv.2207.12045“},”language“:”en“,”primary_location“:{”is_oa“:true,”landing_page_url“:”https://arxiv.org/abs/2207.12045“,”pdf_url“:空,”源“:{”id“:”https://openalex.org/S4306400194“,”display_name“:”arXiv(康奈尔大学)“,”issn_l“:null,”issn“:null,”is_oa“:true,”is_ in_doaj“:false,”host_organization“:”https://openalex.org/I205783295“,”“host_organization_name”:“康奈尔大学”,“host_organization_lineage”:[“https://openalex.org/I205783295“],”host_organization_lineage_names“:[”康奈尔大学“],“type”:“repository”},“license”:“cc-by-sa”,“licence_id”:“https://openalex.org/licenses/cc-bysa“,”version“:”submittedVersion“,”is_accepted“:false,”is_published“:false},”type“:”预印本“,”type_crossref“:”发布的内容“,”indexed_in“:[”arxiv“,”datacite“],”open_access“:{”is_oa“:true,”oa_status“:”green“,”oa_url“:”https://arxiv.org/abs/2207.12045“,”any_repository_has_fulltext“:true},”authorships“:[{”author_position“:”第一“,”作者“:{”id“:”https://openalex.org/A5013672718“,”display_name“:”Ayush Aniket“,”orcid“:null},”institutions“:[],”countries“:[],”is_corresponding“:false,”raw_author_name“:”Aniket,Ayusch“,”raw _affiliation_strings“:],”affiliations“:【】},{”author_position“:”last“,”author“:{”id“:”https://openalex.org/A5086971942“,”display_name“:”Arpan Chattopadhyay“,”orcid“:null},”institutions“:[],”countries“:[],”is_corresponding“:false,”raw_author_name“:”Chattopdhayay,Arpan“,”raw _affiliation_strings“:],”affiliations“:【】}“countries_distiction_count”:0,“institutions_distition_count“:0,”corresponding_author_ids“:【],”correcponding_institution_ids,“apc_list”:空,“apc_payd“:null,”fwci“:null,”has_fulltext“:false,”cited_by_count“:0,”cited_by_percentile_year“:{”min“:0”max“:67},”biblio“:{volume“:null,”issue“:nul,”first_page“:null},“last_page”:null{,”is_retracted“:false,”is_paratext“:false,”primary_topic“:”{“id”:“https://openalex.org/T10462“,”display_name“:”强化学习算法“,”score“:0.9603,”subfield“:{”id“:”https://openalex.org/subfields/1702“,”display_name“:”人工智能“},”字段“:{”id“:”https://openalex.org/fields/17“,”display_name“:”Computer Science“},”domain“:{”id“:”https://openalex.org/domains/3“,”display_name“:”物理科学“}},”主题“:[{”id“:”https://openalex.org/T10462“,”display_name“:”强化学习算法“,”score“:0.9603,”subfield“:{”id“:”https://openalex.org/subfields/1702“,”display_name“:”人工智能“},”字段“:{”id“:”https://openalex.org/fields/17“,”display_name“:”Computer Science“},”domain“:{”id“:”https://openalex.org/domains/3“,”display_name“:”物理科学“}}],”关键词“:[{”id“:”https://openalex.org/keywords/reinforcement-learning网站“,”display_name“:”强化学习“,”score“:0.603185}],”concepts“:[{”id“:”https://openalex.org/C50817715,“wikidata”:https://www.wikidata.org/wiki/Q79895177“,”display_name“:”遗憾“,”等级“:2,”分数“:0.84445524},{”id“:”https://openalex.org/C97541855,“wikidata”:https://www.wikidata.org/wiki/Q830687“,”display_name“:”强化学习“,”level“:2,”score“:0.8159983},{”id“:”https://openalex.org/C106189395,“wikidata”:https://www.wikidata.org/wiki/Q176789“,”display_name“:”Markov决策过程“,”level“:3,”score“:0.77718794},{”id“:”https://openalex.org/C2776330181,“wikidata”:https://www.wikidata.org/wiki/Q18358244“,”display_name“:”Maximization“,”level“:2,”score“:0.66979545},{”id“:”https://openalex.org/C72434380,“wikidata”:https://www.wikidata.org/wiki/Q230930“,”display_name“:”状态空间“,”level“:2,”score“:0.55576277},{”id“:”https://openalex.org/C126255220,“wikidata”:https://www.wikidata.org/wiki/Q141495“,”display_name“:”数学优化“,”level“:1,”score“:0.49955678},{”id“:”https://openalex.org/C98763669,“wikidata”:https://www.wikidata.org/wiki/Q176645“,”display_name“:”Markov链“,”level“:2,”score“:0.49237835},{”id“:”https://openalex.org/C41008148,“wikidata”:https://www.wikidata.org/wiki/Q21198(网址:https://www.wikidata.org/wiki/Q21198)“,”display_name“:”计算机科学“,”level“:0,”score“:0.46721947},{”id“:”https://openalex.org/C33923547,“wikidata”:https://www.wikidata.org/wiki/Q395“,”display_name“:”数学“,”等级“:0,”分数“:0.45632246},{”id“:”https://openalex.org/C159886148,“wikidata”:https://www.wikidata.org/wiki/Q176645“,”display_name“:”Markov过程“,”level“:2,”score“:0.4226673},{”id“:”https://openalex.org/C159176650,“wikidata”:https://www.wikidata.org/wiki/Q43261“,”display_name“:”Horizon“,”level“:2,”score“:0.4216788},{”id“:”https://openalex.org/C28761237,“wikidata”:https://www.wikidata.org/wiki/Q7805321“,”display_name“:”时间范围“,”level“:2,”score“:0.41379195},{”id“:”https://openalex.org/C154945302,“wikidata”:https://www.wikidata.org/wiki/Q11660“,”display_name“:”人工智能“,”level“:1,”score“:0.2558174},{”id“:”https://openalex.org/C105795698,“wikidata”:https://www.wikidata.org/wiki/Q12483“,”display_name“:”Statistics“,”level“:1,”score“:0.19827393},{”id“:”https://openalex.org/C2524010,“wikidata”:https://www.wikidata.org/wiki/Q8087“,”display_name“:”Geometry“,”level“:1,”score“:0.0}],”mesh“:[],”locations_count“:3,”location“:[{”is_oa“:true,”landing_page_url“:”https://arxiv.org/abs/2207.12045“,”pdf_url“:空,”源“:{”id“:”https://openalex.org/S4306400194“,”display_name“:”arXiv(康奈尔大学)“,”issn_l“:null,”issn“:null,”is_oa“:true,”is_ in_doaj“:false,”host_organization“:”https://openalex.org/I205783295“,”“host_organization_name”:“康奈尔大学”,“host_organization_lineage”:[“https://openalex.org/I205783295“],”host_organization_lineage_names“:[”康奈尔大学“],“type”:“repository”},“license”:“cc-by-sa”,“licence_id”:“https://openalex.org/licenses/cc-bysa“,”version“:”submittedVersion“,”is_accepted“:false,”is_published“:false},{”is_oa“:true,”landing_page_url“:”https://arxiv.org/abs/2303.09629,“pdf_url”:https://arxiv.org/pdf/2303.09629,“源”:{“id”:https://openalex.org/S4306400194“,”display_name“:”arXiv(康奈尔大学)“,”issn_l“:null,”issn“:null,”is_oa“:true,”is_ in_doaj“:false,”host_organization“:”https://openalex.org/I205783295“,”“host_organization_name”:“康奈尔大学”,“host_organization_lineage”:[“https://openalex.org/I205783295“],”host_organization_lineage_names“:[”Cornell University“],“type”:“repository”},“license”:null,“licence_id”:null,“version”:“submittedVersion”,“is_accepted”:false,“is_published”:false},{“is_oa”:false,“landing_page_url”:“https://api.datacite.org/dois/10.48550/arxiv.2207.12045“,”pdf_url“:空,”源“:{”id“:”https://openalex.org/S4393179698“,”display_name“:”DataCite API“,”issn_l“:null,”issn“:null,”is_oa“:true,”is_ in_doaj“:false,”host_organization“:”https://openalex.org/I4210145204“,”“host_organization_name”:“DataCite”,“host_organization_lineage”:[“https://openalex.org/I4210145204“],”host_organization_lineage_names“:[”DataCite“],“type”:“metadata”},“license”:null,“licence_id”:null,“version”:null}],“best_oa_location”:{“is_oa”:true,“landing_page_url”:“https://arxiv.org/abs/2207.12045“,”pdf_url“:空,”源“:{”id“:”https://openalex.org/S4306400194“,”display_name“:”arXiv(康奈尔大学)“,”issn_l“:null,”issn“:null,”is_oa“:true,”is_ in_doaj“:false,”host_organization“:”https://openalex.org/I205783295“,”“host_organization_name”:“康奈尔大学”,“host_organization_lineage”:[“https://openalex.org/I205783295“],”host_organization_lineage_names“:[”康奈尔大学“],“type”:“repository”},“license”:“cc-by-sa”,“licence_id”:“https://openalex.org/licenses/cc-bysa“,”version“:”submittedVersion“,”is_accepted“:false,”is_published“:false},”可持续发展目标“:[{”score“:0.83,”display_name“:”和平、正义和强大的机构“,”id“:”https://metadata.un.org/sdg/16“}],”grants“:[],”datasets“:[],”versions“:[】,”referenced_works_count“:0,”referrenced_works“:],”related_work斯“:[”https://openalex.org/W4388236136","https://openalex.org/W4287863949","https://openalex.org/W4287102143","https://openalex.org/W3182614517","https://openalex.org/W2970347269","https://openalex.org/W2945119207","https://openalex.org/W2906267174","https://openalex.org/W2615656344网址","https://openalex.org/W1850488217","https://openalex.org/W134501823“],”ngrams_url“:”https://api.openalex.org/works/W4288054947/ngrams“,”“abstract_inverted_index”:{“We”:[0,31,59],“study”:[1],“learning”:[2],“in”:[3],“periodic”:[4,51],“Markov”:[5],“Decision”:[6],“Process(MDP)”:[7],“a”:[8,36,50],“special”:[9],“type”:[10],“of”:[11,64,83],“non-stational”:[12],“MDP”:[13,38],“where”:[14],“两者”:[15],“the”:[16,26,33,41,45,62,69,75,81],“state”:[17,42],“transition”:[18],“概率“:[19],”和“:[20,48,71],”奖励“:[21,28],”函数“:[22],”变化“:[23],”周期性“:[24],”低于“:[25],”平均“:[27],”最大化“:[29],“设置。“:[30],”公式“:[32],”问题“:[34],”作为“:[35,72],”静止“:[37],”由“:[39],”增强“:[40],”空格“:[43],”with“:[44,68,74],”period“:[46,70],”index“:[47],”suggest“:[49],”upper“:[52],”confidence“:[53],”bounded“:[54],”reinforcement“:[55],”learning-2“:[56],”(PU CRL2)“:[57],”算法。“:[58],”show“:[60],”that“:[61],”遗憾“:[63],”PUCRL2“:[65],”变化“:[66],”线性“:[67],”次线性“:[3],”地平线“:[76],”长度。“:[77],”数值“:[78],”结果“:[79],”演示“:[80],”功效“:[82],”PUCRL2.“:[84]},”引用_by_api_url“:”https://api.openalex.org/works?filter=cites:W4288054947“,”counts_by_year“:[],”updated_date“:”2024-06-22T22:18:06.118140“,”created_date:“2022-07-28”}“