{“id”:“https://openalex.org/W3195641273“,”doi“:”https://doi.org/10.1016/j.neunet.2021.08.017“,”title“:”正向和反向强化学习共享网络权重和超参数“,”display_name“:”正反向强化学习分享网络权重和超级参数“,“publication_year”:2021,“publiation_date”:“2021-12-01”,“ids”:{“openalex”:“https://openalex.org/W3195641273“,”doi“:”https://doi.org/10.1016/j.neunet.2021.08.017“,”mag“:”3195641273“,”pmid“:”https://pubmed.ncbi.nlm.nih.gov/34492548“},”language“:”en“,”primary_location“:{”is_oa“:true,”landing_page_url“:”https://doi.org/10.1016/j.neunet.2021.08.017“,”pdf_url“:空,”源“:{”id“:”https://openalex.org/S123019304“,”display_name“:”Neural networks“,”issn_l“:”0893-6080“,”isn“:[”0893-6080“,”1879-2782“],”is_oa“:false,”is_ in_doaj“:false,”host_organization“:”https://openalex.org/P4310320990“,”“host_organization_name”:“Elsevier BV”,“host_organization_lineage”:[“https://openalex.org/P4310320990“],”host_organization_lineage_names“:[”Elsevier BV“],”type“:”journal“},”license“:”cc by nc nd“,”license_id“:”https://openalex.org/licenses/cc-by-nc-nd“,”version“:”publishedVersion“,”is_accepted“:true,”is_published“:true},”type“:”article“,”type_crossref“:“journal-article”,”indexed_in“:[”arxiv“,”crossref“,”datacite“,”publibmed“],”open_access“:{”is_oa“:true,”oa_status“:”hybrid“,”oa_url“:”https://doi.org/10.1016/j.neunet.2021.08.017“,”any_repository_has_fulltext“:true},”authorships“:[{”author_position“:”first“,”author“:{”id“:”https://openalex.org/A5031054137“,”display_name“:”Eiji Uchibe“,”orcid“:”https://orcid.org/0000-0001-7908-0258},“机构”:[{“id”:https://openalex.org/I4210104143“,”display_name“:”国际高级电信研究所“,”ror“:”https://ror.org/01pe1d703“,”country_code“:”JP“,”type“:”设施“,”沿袭“:[”https://openalex.org/I4210104143“]}],”国家“:[”JP“],”is_corresponding“:true,”raw_author_name“:”Eiji Uchibe“,”raw _affiliation_strings“:[“ATR计算神经科学实验室脑机器人接口部,2-2-2 Hikaridai,Seika-chio,Soraku-gun,Kyoto 619-0288,日本”],“afliations”:[{“raw_affiliation _string”:“ATR计算神经科学实验室脑机器人接口部,2-2-2 Hikaridai,Seika-chio,Soraku-gun,Kyoto 619-0288,Japan“,”institution_ids“:[”https://openalex.org/I4210104143“]}]},{”author_position“:”last“,”author“:{”id“:”https://openalex.org/A5004840638“,”display_name“:”Kenji Doya“,”orcid“:”https://orcid.org/0000-0002-2446-6820},“机构”:[{“id”:https://openalex.org/I142637625“,”display_name“:”冲绳理工大学研究生院“,”ror“:”https://ror.org/02qg15b79“,”country_code“:”JP“,”type“:“教育”,”世系“:[”https://openalex.org/I142637625“]}],”国家“:[”JP“],”is_corresponding“:false,”raw_author_name“:”Kenji Doya“,”raw _ afiliation_strings“:[“冲绳科学技术研究生院神经计算部,1919-1 Tancha,Onna-son,Okinawa 904-0495,Japan”],“afliations”:[{“raw _ ffiliation_strong”:“冲绳科学技术研究生院神经计算部,1919-1 Tancha,Onna-son,Okinawa 904-0495,Japan“,”institution_ids“:[”https://openalex.org/I142637625“]}]}],”countries_disticont_count“:1,”institutions_disticent_count”:2,”corresponding_author_ids“:[”https://openalex.org/A5031054137“],”对应的机构ID“:[”https://openalex.org/I4210104143“],”apc_list“:{”value“:3350,”currency“:”USD“,”value_USD“:33500,”provenance“:”doaj“},”apc _payed“:”{“value”:3350;”currentary“:”美元“,”value_USD”:3380,“provenance”:“doaj”},“fwci”:2.935,“has_fulltext_origin”:“pdf”,“cited_by_count”:14,“citted_by_percentile_year”:{“min”:92,“max”:93},“biblio”:{“volume”:“144”,“issue”:null,“first_page”:“138”last_page“:”153“},”is_retracted“:false,”is_paratext“:fase,”primary_topic“:{”id“:”https://openalex.org/T10462“,”display_name“:”强化学习算法“,”score“:0.9992,”subfield“:{”id“:”https://openalex.org/subfields/1702“,”display_name“:”人工智能“},”字段“:{”id“:”https://openalex.org/fields/17“,”display_name“:”Computer Science“},”domain“:{”id“:”https://openalex.org/domains/3“,”display_name“:”物理科学“}},”主题“:[{”id“:”https://openalex.org/T10462“,”display_name“:”强化学习算法“,”score“:0.9992,”subfield“:{”id“:”https://openalex.org/subfields/1702“,”display_name“:”人工智能“},”字段“:{”id“:”https://openalex.org/fields/17“,”display_name“:”Computer Science“},”domain“:{”id“:”https://openalex.org/domains/3“,”display_name“:”物理科学“}},{”id“:”https://openalex.org/T10581“,”“display_name”:“皮层网络中的神经元振荡”,“score”:0.982,“subfield”:{“id”:“https://openalex.org/subfields/2805“,”display_name“:”认知神经科学“},”字段“:{”id“:”https://openalex.org/fields/28“,”display_name“:”Neuroscience“},”domain“:{”id“:”https://openalex.org/domains/1“,”display_name“:”生命科学“}},{”id“:”https://openalex.org/T12794“,”display_name“:”最优控制的自适应动态规划“,”score“:0.9801,”subfield“:{”id“:”https://openalex.org/subfields/1703“,”display_name“:”计算理论与数学“},”字段“:{”id“:”https://openalex.org/fields/17“,”display_name“:”Computer Science“},”domain“:{”id“:”https://openalex.org/domains/3“,”display_name“:”Physical Sciences“}}],”keywords“:[{”id“:”https://openalex.org/keywords/reinforcement-learning网站“,”display_name“:”强化学习“,”score“:0.597818}],”concepts“:[{”id“:”https://openalex.org/C2779803651,“wikidata”:https://www.wikidata.org/wiki/Q5282088“,”display_name“:”Discriminator“,”level“:3,”score“:0.87446994},{”id“:”https://openalex.org/C41008148,“wikidata”:https://www.wikidata.org/wiki/Q21198“,”display_name“:”计算机科学“,”level“:0,”score“:0.62262917},{”id“:”https://openalex.org/C171752962,“wikidata”:https://www.wikidata.org/wiki/Q255166“,”display_name“:”Kullback\u2013Leibler发散“,”level“:2,”score“:0.6157086},{”id“:”https://openalex.org/C97541855,“wikidata”:https://www.wikidata.org/wiki/Q830687“,”display_name“:”强化学习“,”level“:2,”score“:0.60584915},{”id“:”https://openalex.org/C8642999,“wikidata”:https://www.wikidata.org/wiki/Q4171168“,”display_name“:”Hyperparameter“,”level“:2,”score“:0.5891844},{”id“:”https://openalex.org/C154945302,“wikidata”:https://www.wikidata.org/wiki/Q11660“,”display_name“:”人工智能“,”level“:1,”score“:0.50474274},{”id“:”https://openalex.org/C9679016,“wikidata”:https://www.wikidata.org/wiki/Q1417473“,”display_name“:”最大熵原理“,”level“:2,”score“:0.4922289},{”id“:”https://openalex.org/C106301342,“wikidata”:https://www.wikidata.org/wiki/Q4117933“,”display_name“:”熵(时间箭头)“,”level“:2,”score“:0.48672202},{”id“:”https://openalex.org/C11413529,“wikidata”:https://www.wikidata.org/wiki/Q8366“,”display_name“:”Algorithm“,”level“:1,”score“:0.35026163},{”id“:”https://openalex.org/C119857082,“wikidata”:https://www.wikidata.org/wiki/Q2539“,”display_name“:”机器学习“,”level“:1,”score“:0.3491586},{”id“:”https://openalex.org/C126255220,“wikidata”:https://www.wikidata.org/wiki/Q141495“,”display_name“:”数学优化“,”level“:1,”score“:0.3364654},{”id“:”https://openalex.org/C33923547,“wikidata”:https://www.wikidata.org/wiki/Q395“,”display_name“:”数学“,”等级“:0,”分数“:0.29221883},{”id“:”https://openalex.org/C76155785,“wikidata”:https://www.wikidata.org/wiki/Q418“,”display_name“:”Telecommunications“,”level“:1,”score“:0.0},{”id“:”https://openalex.org/C121332964,“wikidata”:https://www.wikidata.org/wiki/Q413“,”display_name“:”物理“,”级别“:0,”分数“:0.0},{”id“:”https://openalex.org/C62520636,“wikidata”:https://www.wikidata.org/wiki/Q944“,”display_name“:”量子力学“,”level“:1,”score“:0.0},{”id“:”https://openalex.org/C94915269,“wikidata”:https://www.wikidata.org/wiki/Q1834857网址“,”display_name“:”Detector“,”level“:2,”score“:0.0}],”mesh“:[],”locations_count“:5,”location“:[{”is_oa“:true,”landing_page_url“:”https://doi.org/10.1016/j.neunet.2021.08.017“,”pdf_url“:空,”源“:{”id“:”https://openalex.org/S123019304“,”display_name“:”Neural networks“,”issn_l“:”0893-6080“,”isn“:[”0893-6080“,”1879-2782“],”is_oa“:false,”is_ in_doaj“:false,”host_organization“:”https://openalex.org/P4310320990“,”“host_organization_name”:“Elsevier BV”,“host_organization_lineage”:[“https://openalex.org/P4310320990“],”host_organization_lineage_names“:[”Elsevier BV“],”type“:”journal“},”license“:”cc by nc nd“,”license_id“:”https://openalex.org/licenses/cc-by-nc-nd“,”version“:”publishedVersion“,”is_accepted“:true,”is_published“:true},{”is_oa“:true,”landing_page_url“:”https://arxiv.org/abs/2008.07284,“pdf_url”:https://arxiv.org/pdf/2008.07284,“源”:{“id”:https://openalex.org/S4306400194“,”display_name“:”arXiv(康奈尔大学)“,”issn_l“:null,”issn“:null,”is_oa“:true,”is_in_doaj“:false,”host_organization“:”https://openalex.org/I205783295“,”“host_organization_name”:“康奈尔大学”,“host_organization_lineage”:[“https://openalex.org/I205783295“],”host_organization_lineage_names“:[”Cornell University“],“type”:“repository”},“license”:null,“licence_id”:null,“version”:“submittedVersion”,“is_accepted”:false,“is_published”:false},{“is_oa”:true,“landing_page_url”:“http://id.nii.ac.jp/1394/00002139/,“pdf_url”:https://oist.repo.nii.ac.jp/?action=repository_action_common_download&item_id=2339&item_no=1&attribute_id=22&file_no=2,“源”:{“id”:https://openalex.org/S4306402452“,”“display_name”:“冲绳科学技术研究生院(冲绳理工研究生院)”,“issn_l”:null,“issn”:nul,“is_oa”:true,“is_ in_doaj”:false,“host_organization”:“https://openalex.org/I142637625“,”“host_organization_name”:“冲绳理工大学研究生院”,“host_ordanization_lineage”:[“https://openalex.org/I142637625“],”host_organization_lineage_names“:[”冲绳理工大学研究生院“],“type”:“repository”},“license”:“cc-by-nc-nd”,“licence_id”:“https://openalex.org/licenses/cc-by-nc-nd“,”version“:”publishedVersion“,”is_accepted“:true,”is_published“:true},{”is_oa“:false,”landing_page_url“:”https://pubmed.ncbi.nlm.nih.gov/34492548“,”pdf_url“:空,”源“:{”id“:”https://openalex.org/S4306525036“,”display_name“:”PubMed“,”issn_l“:null,”issn“:null,”is_oa“:false,”is_ in_doaj“:false,”host_organization“:”https://openalex.org/I1299303238“,”“host_organization_name”:“美国国立卫生研究院”,“host_ordanization_lineage”:[“https://openalex.org/I1299303238“],”host_organization_lineage_names“:[”美国国立卫生研究院“],”type“:”repository“},”license“:null,”license_id“:null,”version“:null,”is_accepted“:false,”is_published“:false},{”is_oa“:false,”landing_page_url“:”https://api.datacite.org/dois/10.48550/arxiv.2008.07284“,”pdf_url“:空,”源“:{”id“:”https://openalex.org/S4393179698“,”display_name“:”DataCite API“,”issn_l“:null,”issn“:null,”is_oa“:true,”is_ in_doaj“:false,”host_organization“:”https://openalex.org/I4210145204“,”“host_organization_name”:“DataCite”,“host_organization_lineage”:[“https://openalex.org/I4210145204“],”host_organization_lineage_names“:[”DataCite“],“type”:“metadata”},“license”:null,“licence_id”:null,“version”:null}],“best_oa_location”:{“is_oa”:true,“landing_page_url”:“https://doi.org/10.1016/j.neunet.2021.08.017“,”pdf_url“:空,”源“:{”id“:”https://openalex.org/S123019304“,”display_name“:”Neural networks“,”issn_l“:”0893-6080“,”isn“:[”0893-6080“,”1879-2782“],”is_oa“:false,”is_ in_doaj“:false,”host_organization“:”https://openalex.org/P4310320990“,”“host_organization_name”:“Elsevier BV”,“host_organization_lineage”:[“https://openalex.org/P4310320990“],”host_organization_lineage_names“:[”Elsevier BV“],“type”:“journal”},“license”:“cc-by-nc-nd”,“licence_id”:“https://openalex.org/licenses/cc-by-nc-nd“,”version“:”publishedVersion“,”is_accepted“:true,”is_published“:true},”sustainable_development_goals“:[{”score“:0.72,”display_name“:”减少不平等“,”id“:”https://metadata.un.org/sdg/10“}],”grants“:[],”datasets“:[],”versions“:[”https://openalex.org/W3195641273“],”referenced_works_count“:48,”referrenced_works“:[”https://openalex.org/W1977655452","https://openalex.org/W1977828796","https://openalex.org/W1994530392","https://openalex.org/W1999874108","https://openalex.org/W2031067035","https://openalex.org/W2068127265","https://openalex.org/W2075323224","https://openalex.org/W2114984060","https://openalex.org/W2119785746","https://openalex.org/W2125612430","https://openalex.org/W2145339207","https://openalex.org/W2158782408","https://openalex.org/W2171302338","https://openalex.org/W2227909145","https://openalex.org/W2466175722","https://openalex.org/W2559655401","https://openalex.org/W2580133360","https://openalex.org/W2591916597","https://openalex.org/W2751530711","https://openalex.org/W2754517384","https://openalex.org/W2765861418","https://openalex.org/W2766447205","https://openalex.org/W2894613947","https://openalex.org/W2900582619","https://openalex.org/W2904246096","https://openalex.org/W2943868761","https://openalex.org/W2946545621","https://openalex.org/W2949206666","https://openalex.org/W2962787969","https://openalex.org/W2962845991","https://openalex.org/W2962901215","https://openalex.org/W2979776030","https://openalex.org/W2981030070","https://openalex.org/W2982316857","https://openalex.org/W2983294627","https://openalex.org/W2983464671","https://openalex.org/W2990460121网址","https://openalex.org/W2996037775","https://openalex.org/W3000681444","https://openalex.org/W3021829440","https://openalex.org/W3036472058","https://openalex.org/W3102709953","https://openalex.org/W3104595455","https://openalex.org/W3117178794","https://openalex.org/W3175558129","https://openalex.org/W4235391064","https://openalex.org/W4394662461","https://openalex.org/W91088564“],”related_works“:[”https://openalex.org/W4380714744","https://openalex.org/W4319453655","https://openalex.org/W4312713068","https://openalex.org/W4293202849","https://openalex.org/W4281847915","https://openalex.org/W2614538623","https://openalex.org/W2387995142","https://openalex.org/W2089959425","https://openalex.org/W1980965563","https://openalex.org/W1489300767“],”ngrams_url“:”https://api.openalex.org/works/W319564273/ngrams“,”abstract_inverted_index“:{”This“:[0],”paper“:[1],”proposes“:[2],”model-free“:[3],”immusition“:[4],”learning“:[5,24],”named“:[6],”Entropy-Regulated“:[7],”Immusition”:[8],“learning”:[9],“(ERIL)”:[10],“that”:[11,94129157173],“minimmusize”:[1217],“the”:[13,27,40,54,58,63,73,79,84,87,95101110118123131163168181],“背面”:[14119132],“Kullback-Leibler“:[15],”(KL)“:[16],”散度。“:[17],“ERIL”:[18158],“combines”:[19],“forward”:[20,59102114],“and”:[2148178],“inverse”:[22,36124],“reinforction”:[23],“(RL)”:[25],“under”:[26],“framework”:[28],“of”:[29,75],“an”:[30139],“entropy-regulated”:[31],“Markov”:[32],“decision”:[33],“process”。“:[34],”安“:[35],”RL“:[37,60115125],”步骤“:[38,61116],”计算“:[39],”对数比率“:[41],”介于“:[42],”两个“:[43,47],”分布“:[44],”by“:[45,57,72,83122],”评估“:[46],”二进制“:[48],”鉴别器。“:[49],”The“:[50,66],”first“:[51],”discriminator“:[52,97],”distinctive“:[53,78],”state“:[55],”generated“:[56,82],”from“:[62,86],”expert‘s“:[64],”state“:[53,78],”state“:[55],”生成“:[56,82],”来源“:[62,86],”专家“:[64],”状态“。“:[65],”second“:[67,96],”discriminator“:[68],”which“:[69104],”is“:[70,93135159],”structured“:[71],”theory“:/74],”entropy“:[76],”regulation“:[77],”state-action-next-state“:[80],”tuples“:[81],”learner“:[85],”expert“:[88],”ones“。“:[89],“一个”:[90],“显著”:[91],“特征”:[92],“共享”:[98],“超参数”:[99],“with”:[100152],“RL,”:[103],“can”:[105],“be”:[106],“used”:[107],“to”:[108137170],“control”:%109],“discriminator’s”:[111],“能力。“:[112],”A“:[113],”KL“:[120133],”估计“:[121182],”步骤。“:[126],“我们”:[127166],“显示”:[128156185],“最小化”:[130],“分歧”:[134],“等价”:[136],“查找”:[138],“最佳”:[140],“策略”。“:[141],“我们的”:[142],“实验”:[143],“结果”:[144],“on”:[145],“MuJoCo-simulated”:[146],“环境”:[147],“基于视觉”:[149],“到达”:[150],“任务”:[151],“a”:[153175],“机器人”:[154],“手臂”:[155],“更多”:[160],“样本效率”:[161],“比”:[162],“基线”:[164],“方法”。“:[165],”应用“:[167],”方法“:[169],”人类“:[171],”行为“:[172],”执行“:[174],”电极平衡“:[176],”任务“:[177],”描述“:[179],”如何“:[180186],”奖励“:[183],”功能“:[184],”每一个“:[187],”主题“:[188],”实现“:[189],”她“:[190],”目标。“:[191]},”cited_by_api_url“:”https://api.openalex.org/works?filter=cites:W3195641273“,”counts_by_year“:[{年:2024,”cited_by_count“:2},{年“:2023,”cited_by_count”:6},“年”:2022,”citecd_by_count“:5},”{“年份”:2021,”citted_by_count“:1}],”updated_date“:”2024-06-21T15:05:37.696278“,”created_dated“日期”:“2021-08-30”}