{“id”:“https://openalex.org/W4284688079“,”doi“:”https://doi.org/10.48550/arxiv.2207.02099“,”title“:“深度离线RL中内隐正则化的实证研究”,“display_name”:“深度脱机RL中的内隐正则性实证研究”、“publication_year”:2022,“publication_date”:“2022-01-01”,“ids”:{“openalex”:“https://openalex.org/W4284688079“,”doi“:”https://doi.org/10.48550/arxiv.2207.02099“},”language“:”en“,”primary_location“:{”is_oa“:true,”landing_page_url“:”https://arxiv.org/abs/2207.02099“,”pdf_url“:空,”源“:{”id“:”https://openalex.org/S4306400194“,”display_name“:”arXiv(康奈尔大学)“,”issn_l“:null,”issn“:null,”is_oa“:true,”is.in_doaj“:false,”is_core“:fase,”host_organization“:”https://openalex.org/I205783295“,”“host_organization_name”:“康奈尔大学”,“host_organization_lineage”:[“https://openalex.org/I205783295“],”host_organization_lineage_names“:[”康奈尔大学“],“type”:“repository”},“license”:“cc-by”,“licence_id”:“https://openalex.org/licenses/cc-by“,”version“:”submittedVersion“,”is_accepted“:false,”is_published“:false},”type“:”preprint“,”type_crossref“:“posted-content”,”indexed_in“:[”arxiv“,”datacite“],”open_access“:{”is_oa“:true,”oa_status“:”green“,”oa_url“:”https://arxiv.org/abs/2207.02099“,”any_repository_has_fulltext“:true},”authorships“:[{”author_position“:”第一“,”作者“:{”id“:”https://openalex.org/A5041145688“,”display_name“:”\u00c7a\u011flar G\u00fcl\u00e7ehre“,”orcid“:”网址:https://orcid.org/0009-0003-4124-1687“},”机构“:[],”国家“:[],”is_correresponsing“:false,”raw_author_name“:”Gulcehre,Caglar“,”raw_affiation_strings“:[],”附属机构“:[]},{”author_position“:”middle“,”author“:{”id“:”https://openalex.org/A5044157219“,”display_name“:”Srivatsan Srinivasan“,”orcid“:”https://orcid.org/0000-0002-6672-4779“},”机构“:[],”国家“:[[],”is_corresponding“:false,”raw_author_name“:”Srinivasan,Srivatsan“,”raw_affiliation_strings“:[],“从属关系”:[]},{“author_position”:“middle”,“author”:{“id”:“https://openalex.org/A5033464007“,”display_name“:”Jakub Sygnowski“,”orcid“:null},”institutions“:[],”countries“:[],”is_corresponding“:false,”raw_author_name“,”Sygnowski-Jakub“,”raw _affiliation_strings“:],”affiliations“:[/]},{”author_position“:”middle“,”author“:{”id“:”https://openalex.org/A5016618555“,”display_name“:”Georg Ostrovski“,”orcid“:null},”institutions“:[],”countries“:[],”is_corresponding“:false,”raw_author_name“:”Ostrovski,Georg“,”raw _affiliation_strings“:],”affiliations“:[/]},{”author_position“:”middle“,”author“:{”id“:”https://openalex.org/A5050499655“,”display_name“:”Mehrdad Farajtabar“,”orcid“:”https://orcid.org/0000-0002-5510-518X“},”机构“:[],”国家“:[[],”is_corresponding“:false,”raw_author_name“:”Farajtabar,Mehrdad“,”raw _ afiliation_strings“:[],“从属关系”:[]},{“author_position”:“middle”,“author”:{“id”:“https://openalex.org/A5079326539“,”display_name“:”Matthew J.Hoffman“,”orcid“:”https://orcid.org/0000-0001-5076-0540“},”机构“:[],”国家“:[[],”is_corresponding“:false,”raw_author_name“:”Hoffman,Matt“,”raw _ afiliation_strings“:[],“从属关系”:[]},{“author_position”:“middle”,“author”:{“id”:“https://openalex.org/A5043910056“,”display_name“:”Razvan Pascanu“,”orcid“:”https://orcid.org/0000-0002-5470-1238“},”机构“:[],”国家“:[],”is_correresponsing“:false,”raw_author_name“:”Pascanu,Razvan“,”raw_affiation_strings“:[],”附属机构“:[]},{”author_position“:”last“,”author“:{”id“:”https://openalex.org/A5091677854“,”display_name“:”Arnaud Doucet“,”orcid“:”https://orcid.org/0000-0002-7662-419X“},”机构“:[],”国家“:[[],”is_corresponding“:false,”raw_author_name“:”Doucet,Arnaud“,”raw _ afiliation_strings“:[]],”附属机构“:[]}],”机构评估“:[],“countries _ distinct_count”:0,“机构区分计数”:0“:null,”fwci“:null,”has_fulltext“:false,”cited_by_count“:0,”citation_normalized_ppercentage“:{“value”:0.0,”is_in_top_1_ppercent“:false,”is_in_top_1_percent“:false},”cited_by_percentle_year“:{“min”:0,”max“:62},”biblio“:{“volume”:null,”issue“:null,”first_page“:null,”last_page“:null},”is_retracted“:false,”is_paratext“:false,”primary_topic“:{“id”:”https://openalex.org/T10462“,”display_name“:”强化学习算法“,”score“:0.997,”subfield“:{”id“:”https://openalex.org/subfields/1702“,”display_name“:”人工智能“},”字段“:{”id“:”https://openalex.org/fields/17“,”display_name“:”Computer Science“},”domain“:{”id“:”https://openalex.org/domains/3“,”display_name“:”物理科学“}},”主题“:[{”id“:”https://openalex.org/T10462“,”display_name“:”强化学习算法“,”score“:0.997,”subfield“:{”id“:”https://openalex.org/subfields/1702“,”display_name“:”人工智能“},”字段“:{”id“:”https://openalex.org/fields/17“,”display_name“:”Computer Science“},”domain“:{”id“:”https://openalex.org/domains/3“,”display_name“:”物理科学“}},{”id“:”https://openalex.org/T12676“,”display_name“:”极限学习机器的理论与应用“,”score“:0.9877,”subfield“:{”id“:”https://openalex.org/subfields/1702“,”display_name“:”人工智能“},”字段“:{”id“:”https://openalex.org/fields/17“,”display_name“:”Computer Science“},”domain“:{”id“:”https://openalex.org/domains/3“,”display_name“:”物理科学“}},{”id“:”https://openalex.org/T11612“,”display_name“:”机器学习中的优化方法“,”score“:0.9864,”subfield“:{”id“:”https://openalex.org/subfields/1702“,”display_name“:”人工智能“},”字段“:{”id“:”https://openalex.org/fields/17“,”display_name“:”Computer Science“},”domain“:{”id“:”https://openalex.org/domains/3“,”display_name“:”物理科学“}}],”关键词“:[{”id“:”https://openalex.org/keywords/hyperparameter网站“,”display_name“:”Hyperparameter“,”score“:0.73446244},{”id“:”https://openalex.org/keywords/regulation网站“,”display_name“:”正则化(语言学)“,”score“:0.6328303},{”id“:”https://openalex.org/keywords/rank(https://openalex.org/keywords/rank)“,”display_name“:”Rank(图论)“,”score“:0.61224425},{”id“:”https://openalex.org/keywords/reinforcement-learning网站“,”display_name“:”强化学习“,”score“:0.580182},{”id“:”https://openalex.org/keywords/dep-learning网站“,”display_name“:”深度学习“,”score“:0.531084},{”id“:”https://openalex.org/keywords/incremental-learning网站“,”display_name“:”增量学习“,”score“:0.518872},{”id“:”https://openalex.org/keywords/regression网站“,”display_name“:”回归“,”score“:0.51807},{”id“:”https://openalex.org/keywords/online-sequential-learning网站“,”display_name“:”在线顺序学习“,”score“:0.505799},{”id“:”https://openalex.org/keywords/bootstrapping(https://openalex.org/keywords/bootstrapping)“,”display_name“:”Bootstrapping(finance)“,”score“:0.4670025},{”id“:”https://openalex.org/keywords/learning-to-rank“,”display_name“:”Learning to rank“,”score“:0.42450356}],”concepts“:[{”id“:”https://openalex.org/C8642999,“wikidata”:https://www.wikidata.org/wiki/Q4171168“,”display_name“:”Hyperparameter“,”level“:2,”score“:0.73446244},{”id“:”https://openalex.org/C41008148,“wikidata”:https://www.wikidata.org/wiki/Q21198“,”display_name“:”计算机科学“,”level“:0,”score“:0.69543886},{”id“:”https://openalex.org/C2776135515,“wikidata”:https://www.wikidata.org/wiki/Q17143721“,”display_name“:”正则化(语言学)“,”level“:2,”score“:0.6328303},{”id“:”https://openalex.org/C164226766,“wikidata”:https://www.wikidata.org/wiki/Q7293202“,”display_name“:”Rank(图论)“,”level“:2,”score“:0.61224425},{”id“:”https://openalex.org/C97541855,“wikidata”:https://www.wikidata.org/wiki/Q830687“,”display_name“:”强化学习“,”level“:2,”score“:0.611698},{”id“:”https://openalex.org/C154945302,“wikidata”:https://www.wikidata.org/wiki/Q11660“,”display_name“:”人工智能“,”level“:1,”score“:0.5789657},{”id“:”https://openalex.org/C50644808,“wikidata”:https://www.wikidata.org/wiki/Q192776“,”display_name“:”人工神经网络“,”level“:2,”score“:0.5471902},{”id“:”https://openalex.org/C119857082,“wikidata”:https://www.wikidata.org/wiki/Q2539“,”display_name“:”机器学习“,”level“:1,”score“:0.5097284},{”id“:”https://openalex.org/C207609745,“wikidata”:https://www.wikidata.org/wiki/Q4944086“,”display_name“:”Bootstrapping(finance)“,”level“:2,”score“:0.4670025},{”id“:”https://openalex.org/C86037889,“wikidata”:https://www.wikidata.org/wiki/Q4330127“,”display_name“:”Learning to rank“,”level“:3,”score“:0.42450356},{”id“:”https://openalex.org/C149782125,“wikidata”:https://www.wikidata.org/wiki/Q160039“,”display_name“:”计量经济学“,”level“:1,”score“:0.24477127},{”id“:”https://openalex.org/C33923547,“wikidata”:https://www.wikidata.org/wiki/Q395“,”display_name“:”数学“,”等级“:0,”分数“:0.19295424},{”id“:”https://openalex.org/C189430467,“wikidata”:https://www.wikidata.org/wiki/Q7293293“,”display_name“:”排名(信息检索)“,”级别“:2,”分数“:0.1274459},{”id“:”https://openalex.org/C114614502,“wikidata”:https://www.wikidata.org/wiki/Q76592“,”display_name“:”Combinatorics“,”level“:1,”score“:0.0}],”mesh“:[],”locations_count“:2,”locations“:[{”is_oa“:true,”landing_page_url“:”https://arxiv.org/abs/207.02099“,”pdf_url“:空,”源“:{”id“:”https://openalex.org/S4306400194“,”display_name“:”arXiv(康奈尔大学)“,”issn_l“:null,”issn“:null,”is_oa“:true,”is.in_doaj“:false,”is_core“:fase,”host_organization“:”https://openalex.org/I205783295“,”“host_organization_name”:“康奈尔大学”,“host_organization_lineage”:[“https://openalex.org/I205783295“],”host_organization_lineage_names“:[”康奈尔大学“],“type”:“repository”},“license”:“cc-by”,“licence_id”:“https://openalex.org/licenses/cc-by“,”version“:”submittedVersion“,”is_accepted“:false,”is_published“:false},{”is_oa“:false,”landing_page_url“:”https://api.datacite.org/dois/10.48550/arxiv.2207.02099“,”pdf_url“:空,”源“:{”id“:”https://openalex.org/S4393179698“,”display_name“:”DataCite API“,”issn_l“:null,”issn“:null,”is_oa“:true,”is_ in_doaj“:false,”is_core“:false,”host_organization“:”https://openalex.org/I4210145204“,”“host_organization_name”:“DataCite”,“host_organization_lineage”:[“https://openalex.org/I4210145204“],”host_organization_lineage_names“:[”DataCite“],“type”:“metadata”},“license”:null,“licence_id”:null,“version”:null}],“best_oa_location”:{“is_oa”:true,“landing_page_url”:“https://arxiv.org/abs/2207.02099“,”pdf_url“:空,”源“:{”id“:”https://openalex.org/S4306400194“,”display_name“:”arXiv(康奈尔大学)“,”issn_l“:null,”issn“:null,”is_oa“:true,”is.in_doaj“:false,”is_core“:fase,”host_organization“:”https://openalex.org/I205783295“,”“host_organization_name”:“康奈尔大学”,“host_organization_lineage”:[“https://openalex.org/I205783295“],”host_organization_lineage_names“:[”康奈尔大学“],“type”:“repository”},“license”:“cc-by”,“licence_id”:“https://openalex.org/licenses/cc-by“,”version“:”submittedVersion“,”is_accepted“:false,”is_published“:false},”sustainable_development_goals“:[{”display_name“:”和平、正义和强大的制度“,”score“:0.41,”id“:”https://metadata.un.org/sdg/16“}],”grants“:[],”datasets“:[],”versions“:[】,”referenced_works_count“:0,”referrenced_works“:],”related_work斯“:[”https://openalex.org/W4390421286","https://openalex.org/W4389724018","https://openalex.org/W4318719684","https://openalex.org/W4281847915","https://openalex.org/W4280563792","https://openalex.org/W3183136280","https://openalex.org/W3117246195","https://openalex.org/W2140186469","https://openalex.org/W156620619","https://openalex.org/W1534274833“],”abstract_inverted_index“:{”Deep“:[0],”neural“:[1,19],”networks“:[2],”are“:[3],”the“:[4,41,44,59,70,83,91118150165171183186198],”most“:[5],”commonly“:[6],”used“:[7],”function“:[8],”approsors“:[9],13],“先前”:[14],“作品”:[15],“有”:[16],“显示”:[17],“that“:[18,31138163176192207],“nets”:[20],“trained”:[21],“with”:[22],“TD-learning”:[23],“and”:[24,94123133147174203205],“gradient”:[25],“descence”:[26],“can”:[27,32],“exhibit”:[28],“implictive”:[29168],“regulation”:[30169],“be”:[33215],”characterized“:[34],“by”:[35],“under-parameterization”:[36],“的”:[37,43,79161167185],“这些“:[38],”网络。“:[39],”具体来说,“:[40],”秩“:[42,93,98122202],”倒数第二位“:[45],”特征“:[46],”层“,”:[47],”也“:[48],”调用“:[49],”\\textit{effective“:50],”rank}“,”:[51],”has“:[52,65],”been“:[53,66],”observed“:[54],”to“:[55,68,73,82181],”急剧“:[56],”崩溃“:[57,64184],”在“:[58],”训练中。“:[60],”In“:[61108],”turn“:[62],”this“:[63109209],”argument“:[67],”reduce“:[69],”model’s“:[71],”能力“:[72],”further“:[74],”adapt“:[75],”later“:[77],”stages“:[78],”learning“:[80],”leading“:[81],”reduced“:[84],”final“:[85],”performance“。“:[86],”这样“:[87],”一个“:[88],”关联“:[89141210],”介于“:[90120200],”有效“:[92,97121187201],”性能“:[95124204],”制造“:[96],”强制“:[99],”对于“:[100104],”RL,“:[102],”主要“:[103],”政策“:[106],”评估。“:[107],“工作”,“:[110],“我们”,“行为”“实验室”:[135],“我们”:[136],“观察”:[137],“直接”:[140],“存在”:[142],“仅”:[143]受限“:[145],”设置“:[146],”消失“:[148],”更多“:[151],”扩展“:[152],”超参数“:[153],”扫描。“:[154],”“此外,”:[155],“经验性”:[157],“识别”:[158],“阶段”:[160],“学习”:[162172],“解释”:[164182],“影响”:[166],“动力学”:[173],“发现”:[175],“引导”:[177],“单独”:[178],“是”:[179],“不足”:[180],“排名”。“:[188],”“进一步”“:[189],”显示“:[191],”几个“:[193],”其他“:[194],”因素“:[195],”可能“:[196214],”混淆“:[197],”关系“:[199],”结论“:[206],”研究“:[208],”根据“:[211],”简单“:[212],”假设“:[213],”高度“:[216],”误导。“:[217]},”cited_by_api_url“:”https://api.openalex.org/works?filter=cites:W4284688079“,”counts_by_year“:[],”updated_date“:”2024-09-20T01:04:16.694261“,”created_date:“2022-07-08”}“