{“id”:“https://openalex.org/W4387963575“,”doi“:”https://doi.org/10.48550/arxiv.2310.16173“,”title“:“关于$\u03b5$-贪婪探索的深度Q-网络的收敛性和样本复杂性分析”,”display_name“:”关于$\u 03b5$贪婪探索的深层Q-网络收敛性和抽样复杂性分析“,”publication_year“:2023,”publiation_date“:”2023-01-01“,”ids“:{”openalex“:”https://openalex.org/W4387963575“,”doi“:”https://doi.org/10.48550/arxiv.2310.16173“},”language“:”en“,”primary_location“:{”is_oa“:true,”landing_page_url“:”https://arxiv.org/abs/2310.16173“,”pdf_url“:空,”源“:{”id“:”https://openalex.org/S4306400194“,”display_name“:”arXiv(康奈尔大学)“,”issn_l“:null,”issn“:null,”is_oa“:true,”is_ in_doaj“:false,”host_organization“:”https://openalex.org/I205783295“,”“host_organization_name”:“康奈尔大学”,“host_organization_lineage”:[“https://openalex.org/I205783295“],”host_organization_lineage_names“:[”康奈尔大学“],“type”:“repository”},“license”:“other-oa”,“licence_id”:“https://openalex.org/licenses/other-oa网站“,”version“:null,”is_accepted“:false,”is_published“:false},”type“:预打印”,”type_crossref“:”journal-article“,”indexed_in“:[”arxiv“,”datacite“],”open_access“:{”is_oa“:true,”oa_status“:”green“,”oa_url“:”https://arxiv.org/abs/2310.16173“,”any_repository_has_fulltext“:true},”authorships“:[{”author_position“:”第一“,”作者“:{”id“:”https://openalex.org/A5007322337“,”display_name“:”帅章“,”兽人“:”https://orcid.org/0000-0002-3119-1992“},”机构“:[],”国家“:[],”is_correresponsing“:false,”raw_author_name“:”张,帅“,”raw_affiation_strings“:[]},{”author_position“:”middle“,”作者“:{”id“:”https://openalex.org/A5081190780“,”display_name“:”Hongkang Li“,”orcid“:null},”institutions“:[],”countries“:[],”is_corresponding“:false,”raw_author_name“:”Li,Hongkan“,”raw _affiliation_string“:[]},{”author_position“:”middle“,”author“:{”id“:”https://openalex.org/A5036726873“,”display_name“:”Meng Wang“,”orcid“:”https://orcid.org/0000-0002-8689-0811“},”机构“:[],”国家“:[[],”is_corresponding“:false,”raw_author_name“:”Wang,Meng“,”raw关联字符串“:[]},{“author_position”:“middle”,“author”:{“id”:“https://openalex.org/A5019685128“,”display_name“:”苗柳“,”兽人“:”https://orcid.org/0000-0002-2281-2689“},”机构“:[],”国家“:[[],”is_corresponding“:false,”raw_author_name“:”刘,苗“,”raw关联字符串“:[]},{“author_position”:“middle”,“author”:{“id”:“https://openalex.org/A5050344371“,”display_name“:”Pin\u2010Yu Chen“,”orcid“:”https://orcid.org/0000-0003-1039-8369“},”机构“:[],”国家“:[[],”is_corresponding“:false,”raw_author_name“:”Chen,Pin-Yu“,”raw关联字符串“:[]},{“author_position”:“middle”,“author”:{“id”:“https://openalex.org/A5088593720“,”display_name“:”宋涛路“,”兽人“:”https://orcid.org/0000-0001-9256-9648“},”机构“:[],”国家“:[[],”is_corresponding“:false,”raw_author_name“:”Lu,Songtao“,”raw关联字符串“:[]},{“author_position”:“middle”,“author”:{“id”:“https://openalex.org/A5002976916“,”display_name“:”Sijia Liu“,”orcid“:”https://orcid.org/0000-0002-9675-3933“},”机构“:[],”国家“:[[],”is_corresponding“:false,”raw_author_name“:”刘,思嘉“,”raw关联字符串“:[]},{“author_position”:“middle”,“author”:{“id”:“https://openalex.org/A5080921991“,”display_name“:”Keerthiram Murugesan“,”orcid“:”https://orcid.org/0000-0002-6948-5240“},”机构“:[],”国家“:[[],”is_corresponding“:false,”raw_author_name“:”Murugesan,Keerthiram“,”raw关联字符串“:[]},{“author_position”:“last”,“author”:{“id”:“https://openalex.org/A5000750466“,”display_name“:”Subhajit Chaudhury“,”orcid“:null},”institutions“:[],”countries“:[],”is_corresponding“:false,”raw_author_name“,”Chaudhuri,Subhajit“,”raw _affiliation_string“:[]}],”countries _distiction_count“:0,”instistictions_disticutes_count”:0,“corresponding_author_ids”:[]“,”correspounding_instition_ids“:[]],”apc_list“:null,“apc_payd”:空,“has_fulltext“:false,”cited_by_count“:0,”cited_by_percentile_year“:{”min“:0”max“:78},”biblio“:{”volume“:null,”issue“:null:”first_page“:null,”last_page“:null},“is_retracted”:false“is_paratext”:false,“primary_topic”:{“id”:“https://openalex.org/T10462“,”display_name“:”强化学习算法“,”score“:0.9973,”subfield“:{”id“:”https://openalex.org/subfields/1702“,”display_name“:”人工智能“},”字段“:{”id“:”https://openalex.org/fields/17“,”display_name“:”Computer Science“},”domain“:{”id“:”https://openalex.org/domains/3“,”display_name“:”Physical Sciences“}},”topics“:[{”id“:”https://openalex.org/T10462“,”display_name“:”强化学习算法“,”score“:0.9973,”subfield“:{”id“:”https://openalex.org/subfields/1702“,”display_name“:”人工智能“},”字段“:{”id“:”https://openalex.org/fields/17“,”display_name“:”Computer Science“},”domain“:{”id“:”https://openalex.org/domains/3“,”display_name“:”物理科学“}},{”id“:”https://openalex.org/T10502“,”display_name“:”用于神经形态计算的记忆设备“,”score“:0.9921,”subfield“:{”id“:”https://openalex.org/subfields/2208“,”display_name“:”电气与电子工程“},”字段“:{”id“:”https://openalex.org/fields/22“,”display_name“:”Engineering“},”domain“:{”id“:”https://openalex.org/domains/3“,”display_name“:”物理科学“}},{”id“:”https://openalex.org/T12808“,”display_name“:”低功率纳米应用的铁电器件“,”score“:0.979,”subfield“:{”id“:”https://openalex.org/subfields/2208“,”display_name“:”电气与电子工程“},”字段“:{”id“:”https://openalex.org/fields/22“,”display_name“:”Engineering“},”domain“:{”id“:”https://openalex.org/domains/3“,”display_name“:”物理科学“}}],”关键词“:[{”id“:”https://openalex.org/keywords/dep-learning网站“,”display_name“:”深度学习“,”score“:0.523439},{”id“:”https://openalex.org/keywords/reinforcement-learning网站“,”display_name“:”强化学习“,”score“:0.51831}],”concepts“:[{”id“:”https://openalex.org/C2777303404,“wikidata”:https://www.wikidata.org/wiki/Q759757“,”display_name“:”Convergence(economics)“,”level“:2,”score“:0.7975488},{”id“:”https://openalex.org/C2778445095,“wikidata”:https://www.wikidata.org/wiki/Q18354077“,”display_name“:”示例复杂性“,”level“:2,”score“:0.652297},{”id“:”https://openalex.org/C41008148,“wikidata”:https://www.wikidata.org/wiki/Q21198“,”display_name“:”计算机科学“,”level“:0,”score“:0.6491721},{”id“:”https://openalex.org/C97541855,“wikidata”:https://www.wikidata.org/wiki/Q830687“,”display_name“:”强化学习“,”level“:2,”score“:0.59260005},{”id“:”https://openalex.org/C14036430网址,“wikidata”:https://www.wikidata.org/wiki/Q3736076“,”display_name“:”功能(生物学)“,”级别“:2,”分数“:0.58501965},{”id“:”https://openalex.org/C50644808,“wikidata”:https://www.wikidata.org/wiki/Q192776“,”display_name“:”人工神经网络“,”level“:2,”score“:0.5719737},{”id“:”https://openalex.org/C198531522,“wikidata”:https://www.wikidata.org/wiki/Q485146“,”display_name“:”Sample(material)“,”level“:2,”score“:0.52974856},{”id“:”https://openalex.org/C188116033,“wikidata”:https://www.wikidata.org/wiki/Q2664563“,”display_name“:”Q-learning“,”level“:3,”score“:0.5255731},{”id“:”https://openalex.org/C126255220,“wikidata”:https://www.wikidata.org/wiki/Q141495“,”display_name“:”数学优化“,”level“:1,”score“:0.5216965},{”id“:”https://openalex.org/C2776291640,“wikidata”:https://www.wikidata.org/wiki/Q2912517“,”display_name“:”Value(mathematics)“,”level“:2,”score“:0.43360576},{”id“:”https://openalex.org/C139945424,“wikidata”:https://www.wikidata.org/wiki/Q1940696“,”display_name“:”均方误差“,”level“:2,”score“:0.42357144},{”id“:”https://openalex.org/C11413529,“wikidata”:https://www.wikidata.org/wiki/Q8366“,”display_name“:”Algorithm“,”level“:1,”score“:0.3819222},{”id“:”https://openalex.org/C33923547,“wikidata”:https://www.wikidata.org/wiki/Q395“,”display_name“:”数学“,”等级“:0,”分数“:0.37594092},{”id“:”https://openalex.org/C28826006,“wikidata”:https://www.wikidata.org/wiki/Q33521“,”display_name“:”应用数学“,”level“:1,”score“:0.33109123},{”id“:”https://openalex.org/C154945302,“wikidata”:https://www.wikidata.org/wiki/Q11660“,”display_name“:”人工智能“,”level“:1,”score“:0.32775614},{”id“:”https://openalex.org/C119857082,“wikidata”:https://www.wikidata.org/wiki/Q2539“,”display_name“:”机器学习“,”level“:1,”score“:0.24480653},{”id“:”https://openalex.org/C105795698,“wikidata”:https://www.wikidata.org/wiki/Q12483“,”display_name“:”Statistics“,”level“:1,”score“:0.14613473},{”id“:”https://openalex.org/C50522688,“wikidata”:https://www.wikidata.org/wiki/Q189833“,”display_name“:”经济增长“,”level“:1,”score“:0.0},{”id“:”https://openalex.org/C185592680,“wikidata”:https://www.wikidata.org/wiki/Q2329“,”display_name“:”Chemistry“,”level“:0,”score“:0.0},{”id“:”https://openalex.org/C43617362,“wikidata”:https://www.wikidata.org/wiki/Q170050“,”display_name“:”色谱“,”level“:1,”score“:0.0},{”id“:”https://openalex.org/C78458016,“wikidata”:https://www.wikidata.org/wiki/Q840400“,”display_name“:”进化生物学“,”level“:1,”score“:0.0},{”id“:”https://openalex.org/C162324750,“wikidata”:https://www.wikidata.org/wiki/Q8134“,”display_name“:”经济学“,”level“:0,”score“:0.0},{”id“:”https://openalex.org/C86803240,“wikidata”:https://www.wikidata.org/wiki/Q420“,”display_name“:”Biology“,”level“:0,”score“:0.0}],”mesh“:[],”locations_count“:2,”location“:[{”is_oa“:true,”landing_page_url“:”https://arxiv.org/abs/2310.16173“,”pdf_url“:空,”源“:{”id“:”https://openalex.org/S4306400194“,”display_name“:”arXiv(康奈尔大学)“,”issn_l“:null,”issn“:null,”is_oa“:true,”is_ in_doaj“:false,”host_organization“:”https://openalex.org/I205783295“,”“host_organization_name”:“康奈尔大学”,“host_organization_lineage”:[“https://openalex.org/I205783295“],”host_organization_lineage_names“:[”康奈尔大学“],“type”:“repository”},“license”:“other-oa”,“licence_id”:“https://openalex.org/licenses/other-oa网站“,”版本“:null,”is_accepted“:false,”is_published“:false},{”is_oa“:false,”landing_page_url“:”https://api.datacite.org/dois/10.48550/arxiv.2310.16173“,”pdf_url“:空,”源“:{”id“:”https://openalex.org/S4393179698“,”display_name“:”DataCite API“,”issn_l“:null,”issn“:null,”is_oa“:true,”is_ in_doaj“:false,”host_organization“:”https://openalex.org/I4210145204“,”“host_organization_name”:“DataCite”,“host_organization_lineage”:[“https://openalex.org/I4210145204“],”host_organization_lineage_names“:[”DataCite“],“type”:“metadata”},“license”:null,“licence_id”:null,“version”:null}],“best_oa_location”:{“is_oa”:true,“landing_page_url”:“https://arxiv.org/abs/2310.16173“,”pdf_url“:空,”源“:{”id“:”https://openalex.org/S4306400194“,”display_name“:”arXiv(康奈尔大学)“,”issn_l“:null,”issn“:null,”is_oa“:true,”is_ in_doaj“:false,”host_organization“:”https://openalex.org/I205783295“,”“host_organization_name”:“康奈尔大学”,“host_organization_lineage”:[“https://openalex.org/I205783295“],”host_organization_lineage_names“:[”康奈尔大学“],“type”:“repository”},“license”:“other-oa”,“licence_id”:“https://openalex.org/licenses/other-oa网站“,”version“:null,”is_accepted“:false,”is_published“:false},”sustainable_development_goals“:[],”grants“:[],”datasets“:【],”versions“:【】,”referenced_works_count“:0,”referrenced_works“:〔〕,”related_work“:[”https://openalex.org/W4294873804","https://openalex.org/W3096874164","https://openalex.org/W3087814763","https://openalex.org/W2937181779","https://openalex.org/W2537866915","https://openalex.org/W2361647908","https://openalex.org/W2357975469","https://openalex.org/W2166117066","https://openalex.org/W2136202932","https://openalex.org/W2089415692“],”ngrams_url“:”https://api.openalex.org/works/W4387963575/ngrams网站“,”“abstract_inverted_index”:{“This”:[0103],“paper”:[1104],“provides”:[2105],“a”:[3,93139160],“theoryal”:[4,27,79108170],“understanding”:[5],“of”:[6,23,65,81114118142148163],“Deep”:%7],“Q-Network”:+8],“(DQN)”:[9],“with”:[10120128],“the”:[11,19,24,32,41,51,54,66,74,77,88106115133146153156],“$\\varepsilon$-贪婪”:[12],“探索“:[13,33],“in”:[14,40,45,72],“deep”:[15],“reinforction”:[16],“learning”。“:[17],”尽管“:[18],”巨大“:[20],”经验主义“:[21],”成就“:[22],”DQN“:[25],”它的“:[26],”特征描述“:[28],“仍然”:[29],“未充分探索。“:[30],”第一,“:[31],”策略“:[34],”是“:[35,99],”或者“:[36],”不切实际“:[37],”或“:[38,86],”忽略“:[39],”现有“:[42,78],”分析。“:[43],”第二,“:[44],”对比度“:[46],”到“:[47,60132],”常规“:[48],”Q学习“:[49],”算法“:[50],”DQN“:[52],”使用“:[53],”目标“:[55],”网络“:[56],”和“:[57110],”经验“:[58],”重放“:[59],”获取“:[61],”an“:[62125],”无偏“:[63],”估计“:[64],“均方根”:[67],“Bellman”:[68],“错误”:[69],“(MSBE)”:[70],“利用“:[71],“训练”:[73],“Q网络”:[75],“然而”:[76],“分析”:[80,85113],“DQNs”:[82119],“缺乏”:[83],“收敛”:[84109149],“绕过”:[87],“技术”:[89],“挑战”:[90],“通过”:[91],“部署”:[92],“显著”:[94],“参数过高”:[95],“神经”:[96],“网络”:],“which”:[98],“not”:[100],“computationally”:[101],“高效。“:[102],”第一“:[107],”样本“:[111],”复杂性“:[112],”实用“:[116],”设置“:[117],”$\\epsilon$-贪婪“:[121],”策略。“:[122],”We“:[123],”prove“:[124],”迭代“:[126],”过程“:[127],”衰减“:[129],”$\\epsilon$“:[130143164],”收敛“:[131],”最优“:[134],”Q值“:[135],”函数“:[136],”几何。“:[137],”“此外,”:[138],“更高”:[140],“级别”:[141162],“值”:[144],“放大”:[145],“区域”:[147],“但是”:[150],“减慢”:[151],“向下”:[152],“收敛”:[154],“while”:[155],“相反”:[157],“保持”:[158],“用于”:[159],“较低”:[161],“数值。“:[165],“实验”:[166],“证明”:[167],“我们的”:[168],“建立的”:[169],“见解”:[171],“关于”:[172],”DQN。“:[173]},”cited_by_api_url“:”https://api.openalex.org/works?filter=cites:W4387963575“,”counts_by_year“:[],”updated_date“:”2024-05-23T10:42:04.007220“,”created_date:“2023-10-27”}“