{“id”:“https://openalex.org/W4305028652“,”doi“:”https://doi.org/10.48550/arxiv.2210.05355“,”title“:”低等级奖励的多用户强化学习“,”display_name“:”高等级奖励的多名用户强化学习”,“publication_year”:2022,“publitation_date”:“2022-01-01”,“ids”:{“openalex”:“https://openalex.org/W4305028652“,”doi“:”https://doi.org/10.48550/arxiv.2210.05355“},”language“:”en“,”primary_location“:{”is_oa“:true,”landing_page_url“:”https://arxiv.org/abs/2210.05355“,”pdf_url“:空,”源“:{”id“:”https://openalex.org/S4306400194“,”display_name“:”arXiv(康奈尔大学)“,”issn_l“:null,”issn“:null,”is_oa“:true,”is_ in_doaj“:false,”host_organization“:”https://openalex.org/I205783295“,”“host_organization_name”:“康奈尔大学”,“host_organization_lineage”:[“https://openalex.org/I205783295“],”host_organization_lineage_names“:[”康奈尔大学“],“type”:“repository”},“license”:“cc-by”,“licence_id”:“https://openalex.org/licenses/cc-by“,”version“:”submittedVersion“,”is_accepted“:false,”is_published“:false},”type“:”预印本“,”type_crossref“:”发布的内容“,”indexed_in“:[”arxiv“,”datacite“],”open_access“:{”is_oa“:true,”oa_status“:”green“,”oa_url“:”https://arxiv.org/abs/2210.05355“,”any_repository_has_fulltext“:true},”authorships“:[{”author_position“:”第一“,”作者“:{”id“:”https://openalex.org/A5007875487“,”display_name“:”Naman Agarwal“,”orcid“:”https://orcid.org/0000-0003-0320-0238“},”机构“:[],”国家“:[[],”is_corresponding“:false,”raw_author_name“:”Agarwal,Naman“,”raw_affiliation_strings“:[],“从属关系”:[]},{“author_position”:“middle”,“author”:{“id”:“https://openalex.org/A5034432097“,”display_name“:”Pratek Jain“,”orcid“:”https://orcid.org/0000-0002-0162-489X“},”机构“:[],”国家“:[[],”is_corresponding“:false,”raw_author_name“:”Jain,Prateek“,”raw_affiliation_strings“:[],“从属关系”:[]},{“author_position”:“middle”,“author”:{“id”:“https://openalex.org/A5076315188“,”display_name“:”Suhas S Kowshik“,”orcid“:null},”institutions“:[],”countries“:[],”is_corresponding“:false,”raw_author_name“,”Kowshik,Suhas“,”raw _affiliation_strings“:【】,”affiliations“:〔〕},{”author_position“:”middle“,”author“:{”id“:”https://openalex.org/A5063107043“,”display_name“:”Dheeraj Nagaraj“,”orcid“:null},”institutions“:[],”countries“:[],”is_corresponding“:false,”raw_author_name“:”Nagaraj“,”heeraj“,“raw_affiliation_strings”:[]“,”afliations“:【】},{”author_position“:”last“,”author“:{”id“:”https://openalex.org/A5007012613“,”display_name“:”Praneeth Netrapalli“,”orcid“:null},”institutions“:[],”countries“:[],”is_corresponding“:false,”raw_author_name“:”Netrapally,Praneethe“,”raw _affiliation_strings“:],”affiliations“:【】}],“countries_distinact_count”:0,“institutions_distiction_count“:0,”corresponding_author_ids“:【],”corresponding_institution_ids,“apc_list”:空,“apc_payd“:null,”fwci“:null,”has_fulltext“:false,”cited_by_count“:0,”cited_by_percentile_year“:{”min“:0”max“:67},”biblio“:{volume“:null,”issue“:nul,”first_page“:null},“last_page”:null{,”is_retracted“:false,”is_paratext“:false,”primary_topic“:”{“id”:“https://openalex.org/T12101“,”“display_name”:“多臂盗贼问题的优化”,“score”:0.9879,“subfield”:{“id”:“https://openalex.org/subfields/1803“,”display_name“:”管理科学与运筹学“},”字段“:{”id“:”https://openalex.org/fields/18“,”display_name“:”Decision Sciences“},”domain“:{”id“:”https://openalex.org/domains/2“,”display_name“:”社会科学“}},”主题“:[{”id“:”https://openalex.org/T12101“,”“display_name”:“多臂盗贼问题的优化”,“score”:0.9879,“subfield”:{“id”:“https://openalex.org/subfields/1803“,”display_name“:”管理科学与运筹学“},”字段“:{”id“:”https://openalex.org/fields/18“,”display_name“:”Decision Sciences“},”domain“:{”id“:”https://openalex.org/domains/2“,”“display_name”:“社会科学”}},{”id“:”https://openalex.org/T10603“,”display_name“:”智能电网中的需求响应“,”score“:0.9631,”subfield“:{”id“:”https://openalex.org/subfields/2208“,”display_name“:”电气与电子工程“},”字段“:{”id“:”https://openalex.org/fields/22“,”display_name“:”Engineering“},”domain“:{”id“:”https://openalex.org/domains/3“,”display_name“:”物理科学“}},{”id“:”https://openalex.org/T11182“,”display_name“:”拍卖和采购合同中的机制设计“,”score“:0.958,”subfield“:{”id“:”https://openalex.org/subfields/1803“,”display_name“:”管理科学与运筹学“},”字段“:{”id“:”https://openalex.org/fields/18“,”display_name“:”Decision Sciences“},”domain“:{”id“:”https://openalex.org/domains/2“,”display_name“:”社会科学“}}],”关键词“:[{”id“:”https://openalex.org/keywords/在线学习“,”display_name“:”在线学习“,”score“:0.491609}],”concepts“:[{”id“:”https://openalex.org/C2778445095,“wikidata”:https://www.wikidata.org/wiki/Q18354077“,”display_name“:”示例复杂性“,”level“:2,”score“:0.800472},{”id“:”https://openalex.org/C97541855,“wikidata”:https://www.wikidata.org/wiki/Q830687网址“,”display_name“:”强化学习“,”level“:2,”score“:0.79567504},{”id“:”https://openalex.org/C164226766,“wikidata”:https://www.wikidata.org/wiki/Q7293202“,”display_name“:”Rank(图论)“,”level“:2,”score“:0.7694448},{”id“:”https://openalex.org/C41008148,“wikidata”:https://www.wikidata.org/wiki/Q21198“,”display_name“:”计算机科学“,”level“:0,”score“:0.72370857},{”id“:”https://openalex.org/C72434380,“wikidata”:https://www.wikidata.org/wiki/Q230930“,”display_name“:”状态空间“,”level“:2,”score“:0.6064229},{”id“:”https://openalex.org/C2778572836,“wikidata”:https://www.wikidata.org/wiki/Q380933网址“,”display_name“:”空格(标点符号)“,”level“:2,”score“:0.5870517},{”id“:”https://openalex.org/C198531522,“wikidata”:https://www.wikidata.org/wiki/Q485146“,”display_name“:”Sample(material)“,”level“:2,”score“:0.54048336},{”id“:”https://openalex.org/C2777027219,“wikidata”:https://www.wikidata.org/wiki/Q1284190“,”display_name“:”常量(计算机编程)“,”level“:2,”score“:0.48902115},{”id“:”https://openalex.org/C26517878,“wikidata”:https://www.wikidata.org/wiki/Q228039“,”display_name“:”Key(lock)“,”level“:2,”score“:0.47398606},{”id“:”https://openalex.org/C48103436,“wikidata”:https://www.wikidata.org/wiki/Q599031“,”display_name“:”State(computer science)“,”level“:2,”score“:0.47170544},{”id“:”https://openalex.org/C2780791683,“wikidata”:https://www.wikidata.org/wiki/Q846785“,”display_name“:”Action(physical)“,”level“:2,”score“:0.4555612},{”id“:”https://openalex.org/C21569690,“wikidata”:https://www.wikidata.org/wiki/Q94702“,”display_name“:”协作筛选“,”level“:3,”score“:0.41790885},{”id“:”https://openalex.org/C11335779,“wikidata”:https://www.wikidata.org/wiki/Q3454686“,”display_name“:”Reduction(mathematics)“,”level“:2,”score“:0.41690397},{”id“:”https://openalex.org/C151376022,“wikidata”:https://www.wikidata.org/wiki/Q168698“,”display_name“:”指数函数“,”level“:2,”score“:0.41466945},{”id“:”https://openalex.org/C119857082,“wikidata”:https://www.wikidata.org/wiki/Q2539“,”display_name“:”机器学习“,”level“:1,”score“:0.35235935},{”id“:”https://openalex.org/C154945302,“wikidata”:https://www.wikidata.org/wiki/Q11660“,”display_name“:”人工智能“,”level“:1,”score“:0.34640226},{”id“:”https://openalex.org/C557471498,“wikidata”:https://www.wikidata.org/wiki/Q554950“,”display_name“:”推荐系统“,”level“:2,”score“:0.27622414},{”id“:”https://openalex.org/C11413529,“wikidata”:https://www.wikidata.org/wiki/Q8366“,”display_name“:”Algorithm“,”level“:1,”score“:0.22374228},{”id“:”https://openalex.org/C33923547,“wikidata”:https://www.wikidata.org/wiki/Q395“,”display_name“:”数学“,”等级“:0,”分数“:0.16415685},{”id“:”https://openalex.org/C105795698,“wikidata”:https://www.wikidata.org/wiki/Q12483“,”display_name“:”Statistics“,”level“:1,”score“:0.13809413},{”id“:”https://openalex.org/C134306372,“wikidata”:https://www.wikidata.org/wiki/Q7754“,”display_name“:”数学分析“,”level“:1,”score“:0.0},{”id“:”https://openalex.org/C185592680,“wikidata”:https://www.wikidata.org/wiki/Q2329“,”display_name“:”Chemistry“,”level“:0,”score“:0.0},{”id“:”https://openalex.org/C38652104,“wikidata”:https://www.wikidata.org/wiki/Q3510521“,”display_name“:”计算机安全“,”level“:1,”score“:0.0},{”id“:”https://openalex.org/C43617362,“wikidata”:https://www.wikidata.org/wiki/Q170050“,”display_name“:”色谱“,”level“:1,”score“:0.0},{”id“:”https://openalex.org/C62520636,“wikidata”:https://www.wikidata.org/wiki/Q944网址“,”display_name“:”量子力学“,”level“:1,”score“:0.0},{”id“:”https://openalex.org/C199360897,“wikidata”:https://www.wikidata.org/wiki/Q9143“,”display_name“:”编程语言“,”level“:1,”score“:0.0},{”id“:”https://openalex.org/C111919701,“wikidata”:https://www.wikidata.org/wiki/Q9135“,”display_name“:”操作系统“,”level“:1,”score“:0.0},{”id“:”https://openalex.org/C2524010,“wikidata”:https://www.wikidata.org/wiki/Q8087“,”display_name“:”Geometry“,”level“:1,”score“:0.0},{”id“:”https://openalex.org/C121332964,“wikidata”:https://www.wikidata.org/wiki/Q413“,”display_name“:”物理“,”级别“:0,”分数“:0.0},{”id“:”https://openalex.org/C114614502,“wikidata”:https://www.wikidata.org/wiki/Q76592“,”display_name“:”组合数学“,”level“:1,”score“:0.0}],”mesh“:[],”locations_count“:3,”location“:[{”is_oa“:true,”landing_page_url“:”https://arxiv.org/abs/2210.05355“,”pdf_url“:空,”源“:{”id“:”https://openalex.org/S4306400194“,”display_name“:”arXiv(康奈尔大学)“,”issn_l“:null,”issn“:null,”is_oa“:true,”is_ in_doaj“:false,”host_organization“:”https://openalex.org/I205783295“,”“host_organization_name”:“康奈尔大学”,“host_organization_lineage”:[“https://openalex.org/I205783295“],”host_organization_lineage_names“:[”康奈尔大学“],“type”:“repository”},“license”:“cc-by”,“licence_id”:“https://openalex.org/licenses/cc-by“,”version“:”submittedVersion“,”is_accepted“:false,”is_published“:false},{”is_oa“:true,”landing_page_url“:”http://arxiv.org/abs/2210.05355,“pdf_url”:http://arxiv.org/pdf/2210.05355,“源”:{“id”:https://openalex.org/S4306400194“,”display_name“:”arXiv(康奈尔大学)“,”issn_l“:null,”issn“:null,”is_oa“:true,”is_ in_doaj“:false,”host_organization“:”https://openalex.org/I205783295“,”“host_organization_name”:“康奈尔大学”,“host_organization_lineage”:[“https://openalex.org/I205783295“],”host_organization_lineage_names“:[”Cornell University“],“type”:“repository”},“license”:null,“licence_id”:null,“version”:“submittedVersion”,“is_accepted”:false,“is_published”:false},{“is_oa”:false,“landing_page_url”:“https://api.datacite.org/dois/10.48550/arxiv.2210.05355“,”pdf_url“:空,”源“:{”id“:”https://openalex.org/S4393179698“,”display_name“:”DataCite API“,”issn_l“:null,”issn“:null,”is_oa“:true,”is_ in_doaj“:false,”host_organization“:”https://openalex.org/I4210145204“,”“host_organization_name”:“DataCite”,“host_organization_lineage”:[“https://openalex.org/I4210145204“],”host_organization_lineage_names“:[”DataCite“],“type”:“metadata”},“license”:null,“licence_id”:null,“version”:null}],“best_oa_location”:{“is_oa”:true,“landing_page_url”:“https://arxiv.org/abs/2210.05355“,”pdf_url“:空,”源“:{”id“:”https://openalex.org/S4306400194“,”display_name“:”arXiv(康奈尔大学)“,”issn_l“:null,”issn“:null,”is_oa“:true,”is_ in_doaj“:false,”host_organization“:”https://openalex.org/I205783295“,”“host_organization_name”:“康奈尔大学”,“host_organization_lineage”:[“https://openalex.org/I205783295“],”host_organization_lineage_names“:[”康奈尔大学“],“type”:“repository”},“license”:“cc-by”,“licence_id”:“https://openalex.org/licenses/cc-by“,”version“:”submittedVersion“,”is_accepted“:false,”is_published“:false},”sustainable_development_goals“:[],”grants“:[],”datasets“:],”versions“:[https://openalex.org/W4386738330","https://openalex.org/W4310614650","https://openalex.org/W4297873223","https://openalex.org/W4225571923","https://openalex.org/W3212257828","https://openalex.org/W2877093712","https://openalex.org/W2398165842","https://openalex.org/W2350784623","https://openalex.org/W2126211886","https://openalex.org/W2116157560“],”ngrams_url“:”https://api.openalex.org/works/W4305028652/ngrams网站“,”“abstract_inverted_index”:{“In”:[0,12],“this”:[1,13],“work”:[2],“we”:[3,64],“consider”:[4],“the”:[5,20,32,35,39,54,60,74,78117121129132140146],“problem”:[6],“of”:[7,38131],“collaborative”:[8,56],“multi-user”:/9],“reinforcement”:[10],“learning.”:[11],“setting”:[14,58],“there“:[15],”are“:[16],”multiple“:[17],”users“:[18,41],”带有“:[19,28,67,94],“相同”:[21],“状态-动作”:[22],“空间”:[23],“和”:[24,49,98109116],“过渡”:[25],“概率”:[26],“但”:[27],“不同”:[29],“奖励”。“:[30],”Under“:[31],”assumption“:[33,52],”that“:[34,76],”reward“:[36],”matrix“:[37],”$N$“:[40,95113],”has“:[42],”a“:[43,47],”low rank“:[44],”structure“:[45],”--“:[46,59],“standard”:[48147],“actually”:[50],“successful”:[51],“in”:[53103],“offline”:[55],“过滤”:[57],“问题”:[61],“是”:[62,87114119],“可以”:[63,99],“设计”:[65],“算法“:[66],”显著“:[68],”较低“:[69],”样本“:[70122],”复杂性“:[71123],”比较“:[72144],”to“:[73145],”ones“:[75],”学习“:[77100],”MDP“:[79125],”单独“:[80],”for“:[81],”each“:[82],”user。“:[83],“我们的”:[84],“主要”:[85],“贡献”:[86],“一个”:[88136],“算法”:[89],“其中”:[90134],“探索”:[91],“奖励”:[92101],“协作”:[93],“特定用户”:[96],“MDP”:[97108],“高效”:[102],“两个”:[104],“键”:[105],“设置:”:“:[106],“表格”:[107],“线性”:[110],“MDP。“:[111],”When“:[112],”large“:[115],”rank“:[118],”constant“,:[120],”per“:[124],”depends“:[126],”对数“:[127],”over“:[128],”size“:[130],”state-space“:[133],”represents“:%135],”index“:[137],”reducation“:[138],”(in“:[139],”state-space“:141],”大小)“:[142],”When“:%143],“`非协作'”:[148],“算法。“:[149]},”cited_by_api_url“:”https://api.openalex.org/works?filter=cites:W4305028652“,”counts_by_year“:[],”updated_date“:”2024-06-22T07:32:47.419816“,”created_date:“2022-10-14”}“