{“id”:“https://openalex.org/W4292862358“,”doi“:”https://doi.org/10.109/lra.2022.3196139“,”title“:”使用模型和策略的熵正则化进行基于模型的模拟学习“,”display_name“:”利用模型和策略熵正则化的基于模型的模仿学习“,“publication_year”:2022,“publiation_date”:“2022-10-01”,“ids”:{“openalex”:“https://openalex.org/W4292862358“,”doi“:”https://doi.org/10.109/lra.2022.3196139“},”language“:”en“,”primary_location“:{”is_oa“:true,”landing_page_url“:”https://doi.org/10.109/lra.2022.3196139,“pdf_url”:https://ieeexplore.ieee.org/ielx7/7083369/9831196/09849015.pdf,“源”:{“id”:https://openalex.org/S4210169774“,”display_name“:”IEEE robotics&automation letters“,”issn_l“:”2377-3766“,”isn“:[”2377-7766“],”is_oa“:false,”is_ in_doaj“:false,”host_organization“:”https://openalex.org/P4310318808“,”“host_organization_name”:“电气与电子工程师学会”,“host_ordanization_lineage”:[“https://openalex.org/P4310318808“],”host_organization_lineage_names“:[”电气与电子工程师协会“],“type”:“journal”},“license”:“cc-by”,“licence_id”:“https://openalex.org/licenses/cc-by“,”version“:”publishedVersion“,”is_accepted“:true,”is_published“:true},”type“:”article“,”type_crossref“:“journal-article”,”indexed_in“:[”arxiv“,”crossref“,”datacite“],”open_access“:{”is_oa“:true,”oa_status“:”hybrid“,”oa_url“:”https://ieeexplore.ieee.org/ielx7/7083369/9831196/09849015.pdf“,”any_repository_has_fulltext“:true},”authorships“:[{”author_position“:”first“,”author“:{”id“:”https://openalex.org/A5031054137“,”display_name“:”Eiji Uchibe“,”orcid“:”https://orcid.org/0000-0001-7908-0258},“机构”:[{“id”:https://openalex.org/I4210104143“,”display_name“:”国际高级电信研究所“,”ror“:”https://ror.org/01pe1d703“,”country_code“:”JP“,”type“:”设施“,”沿袭“:[”https://openalex.org/I4210104143“]}],”国家“:[”JP“],”is_corresponding“:true,”raw_author_name“:”Eiji Uchibe“,”raw _affiliation_strings“:[“日本京都Soraku-gun ATR计算神经科学实验室脑机器人接口部”],”affiliations“:”脑-机器人接口部,ATR计算神经科学实验室,日本京都Soraku-gun”,“institution_ids”:[“https://openalex.org/I4210104143“]}]}],”countries_disticont_count“:1,”institutions_disticent_count”:1,“corresponding_author_ids”:[“https://openalex.org/A5031054137“],”对应的机构ID“:[”https://openalex.org/I4210104143“],”apc_list“:null,”apc _ paid“:null,”fwci“:0.332,”has_fulltext“:true,”fulltext_origin“:”pdf“,”cited_by_count“:1,”citecd_by_percentile_year“:{”min“:67,”max“:76},”biblio“:{“volume”:“7”,“issue”:“4”,“first_page”:“10922”,“last_page”:”10929“}”,“is_retracted”:false,“is_paratext”:false,“primary_topic”:{“id”:“https://openalex.org/T10462“,”display_name“:”强化学习算法“,”score“:0.9999,”subfield“:{”id“:”https://openalex.org/subfields/1702“,”display_name“:”人工智能“},”字段“:{”id“:”https://openalex.org/fields/17“,”display_name“:”Computer Science“},”domain“:{”id“:”https://openalex.org/domains/3“,”display_name“:”物理科学“}},”主题“:[{”id“:”https://openalex.org/T10462“,”display_name“:”强化学习算法“,”score“:0.9999,”subfield“:{”id“:”https://openalex.org/subfields/1702“,”display_name“:”人工智能“},”字段“:{”id“:”https://openalex.org/fields/17“,”display_name“:”Computer Science“},”domain“:{”id“:”https://openalex.org/domains/3“,”display_name“:”物理科学“}},{”id“:”https://openalex.org/T10653“,”display_name“:”机器人抓取和演示学习“,”score“:0.9988,”subfield“:{”id“:”https://openalex.org/subfields/2207“,”display_name“:”控制与系统工程“},”字段“:{”id“:”https://openalex.org/fields/22“,”display_name“:”Engineering“},”domain“:{”id“:”https://openalex.org/domains/3“,”display_name“:”物理科学“}},{”id“:”https://openalex.org/T10879“,”“display_name”:“机器人和动物双足运动的生物力学”,“score”:0.9907,“subfield”:{“id”:“https://openalex.org/subfields/2204“,”display_name“:”生物医学工程“},”字段“:{”id“:”https://openalex.org/fields/22“,”display_name“:”Engineering“},”domain“:{”id“:”https://openalex.org/domains/3“,”display_name“:”Physical Sciences“}}],”keywords“:[{”id“:”https://openalex.org/keywords/reinforcement-learning网站“,”display_name“:”强化学习“,”score“:0.576775},{”id“:”https://openalex.org/keywords/robot-learning“,”display_name“:”机器人学习“,”score“:0.553286},{”id“:”https://openalex.org/keywords/model-based-learning“,”display_name“:”基于模型的学习“,”score“:0.51276},{”id“:”https://openalex.org/keywords/human机器人协作“,”display_name“:”人类机器人协作“,”score“:0.50503},{”id“:”https://openalex.org/keywords/central-pattern-generators网站“,”display_name“:”中心模式生成器“,”score“:0.504146}],”concepts“:[{”id“:”https://openalex.org/C2779803651,“wikidata”:https://www.wikidata.org/wiki/Q5282088“,”display_name“:”Discriminator“,”level“:3,”score“:0.8605387},{”id“:”https://openalex.org/C97541855,“wikidata”:https://www.wikidata.org/wiki/Q830687“,”display_name“:”强化学习“,”level“:2,”score“:0.7253407},{”id“:”https://openalex.org/C41008148,“wikidata”:https://www.wikidata.org/wiki/Q21198“,”display_name“:”计算机科学“,”level“:0,”score“:0.71986},{”id“:”https://openalex.org/C154945302,“wikidata”:https://www.wikidata.org/wiki/Q11660“,”display_name“:”人工智能“,”level“:1,”score“:0.6429826},{”id“:”https://openalex.org/C119857082,“wikidata”:https://www.wikidata.org/wiki/Q2539“,”display_name“:”机器学习“,”level“:1,”score“:0.5921752},{”id“:”https://openalex.org/C106189395,“wikidata”:https://www.wikidata.org/wiki/Q176789“,”display_name“:”Markov决策过程“,”level“:3,”score“:0.537632},{”id“:”https://openalex.org/C106301342,“wikidata”:https://www.wikidata.org/wiki/Q4117933“,”display_name“:”熵(时间箭头)“,”level“:2,”score“:0.5288091},{”id“:”https://openalex.org/C17098449,“wikidata”:https://www.wikidata.org/wiki/Q176814“,”“display_name”“:”“部分可观察的马尔可夫决策过程”“,”级别“:4,”分数“:0.5002301},{”id“:”https://openalex.org/C2776135515,“wikidata”:https://www.wikidata.org/wiki/Q17143721“,”display_name“:”正则化(语言学)“,”level“:2,”score“:0.4325521},{”id“:”https://openalex.org/C108650721,“wikidata”:https://www.wikidata.org/wiki/Q1783253“,”display_name“:”反事实思维“,”level“:2,”score“:0.42671186},{”id“:”https://openalex.org/C90509273,“wikidata”:https://www.wikidata.org/wiki/Q11012“,”display_name“:”Robot“,”level“:2,”score“:0.42321545},{”id“:”https://openalex.org/C9679016,“wikidata”:https://www.wikidata.org/wiki/Q1417473“,”display_name“:”最大熵原理“,”level“:2,”score“:0.41568273},{”id“:”https://openalex.org/C167966045,“wikidata”:https://www.wikidata.org/wiki/Q5532625“,”display_name“:”生成模型“,”level“:3,”score“:0.41300035},{”id“:”https://openalex.org/C159886148,“wikidata”:https://www.wikidata.org/wiki/Q176645“,”display_name“:”Markov过程“,”level“:2,”score“:0.34113997},{”id“:”https://openalex.org/C39890363,“wikidata”:https://www.wikidata.org/wiki/Q36108“,”display_name“:”生成语法“,”level“:2,”score“:0.32357335},{”id“:”https://openalex.org/C163836022,“wikidata”:https://www.wikidata.org/wiki/Q6771326“,”display_name“:”Markov模型“,”level“:3,”score“:0.23965752},{”id“:”https://openalex.org/C98763669,“wikidata”:https://www.wikidata.org/wiki/Q176645“,”display_name“:”Markov链“,”level“:2,”score“:0.23766166},{”id“:”https://openalex.org/C33923547,“wikidata”:https://www.wikidata.org/wiki/Q395“,”display_name“:”数学“,”等级“:0,”分数“:0.13595685},{”id“:”https://openalex.org/C76155785,“wikidata”:https://www.wikidata.org/wiki/Q418“,”display_name“:”Telecommunications“,”level“:1,”score“:0.0},{”id“:”https://openalex.org/C138885662,“wikidata”:https://www.wikidata.org/wiki/Q5891“,”display_name“:”哲学“,”等级“:0,”分数“:0.0},{”id“:”https://openalex.org/C105795698,“wikidata”:https://www.wikidata.org/wiki/Q12483“,”display_name“:”Statistics“,”level“:1,”score“:0.0},{”id“:”https://openalex.org/C121332964,“wikidata”:https://www.wikidata.org/wiki/Q413“,”display_name“:”物理“,”级别“:0,”分数“:0.0},{”id“:”https://openalex.org/C111472728,“wikidata”:https://www.wikidata.org/wiki/Q9471“,”display_name“:”认识论“,”level“:1,”score“:0.0},{”id“:”https://openalex.org/C62520636,“wikidata”:https://www.wikidata.org/wiki/Q944“,”display_name“:”量子力学“,”level“:1,”score“:0.0},{”id“:”https://openalex.org/C94915269,“wikidata”:https://www.wikidata.org/wiki/Q1834857网址“,”display_name“:”Detector“,”level“:2,”score“:0.0}],”mesh“:[],”locations_count“:3,”location“:[{”is_oa“:true,”landing_page_url“:”https://doi.org/10.1109/lra.202.3196139,“pdf_url”:https://ieeexplore.iee.org/ielx7/7083369/9831196/09849015.pdf,“源”:{“id”:https://openalex.org/S4210169774“,”display_name“:”IEEE机器人和自动化字母“,”issn_l“:”2377-3766“,”issn“:[”2377-3766“],”is_oa“:false,”is_in_doaj“:false,”host_organization“:”https://openalex.org/P4310318808“,”“host_organization_name”:“电气与电子工程师学会”,“host_ordanization_lineage”:[“https://openalex.org/P4310318808“],”host_organization_lineage_names“:[”电气与电子工程师协会“],“type”:“journal”},“license”:“cc-by”,“licence_id”:“https://openalex.org/licenses/cc-by“,”version“:”publishedVersion“,”is_accepted“:true,”is_published“:true},{”is_oa“:true,”landing_page_url“:”https://arxiv.org/abs/2206.10101,“pdf_url”:https://arxiv.org/pdf/2206.10101,“源”:{“id”:https://openalex.org/S4306400194“,”display_name“:”arXiv(康奈尔大学)“,”issn_l“:null,”issn“:null,”is_oa“:true,”is_in_doaj“:false,”host_organization“:”https://openalex.org/I205783295“,”“host_organization_name”:“康奈尔大学”,“host_organization_lineage”:[“https://openalex.org/I205783295“],”host_organization_lineage_names“:[”Cornell University“],“type”:“repository”},“license”:null,“licence_id”:null,“version”:“submittedVersion”,“is_accepted”:false,“is_published”:false},{“is_oa”:false,“landing_page_url”:“https://api.datacite.org/dois/10.48550/arxiv.2206.10101“,”pdf_url“:空,”源“:{”id“:”https://openalex.org/S4393179698“,”display_name“:”DataCite API“,”issn_l“:null,”issn“:null,”is_oa“:true,”is_ in_doaj“:false,”host_organization“:”https://openalex.org/I4210145204“,”“host_organization_name”:“DataCite”,“host_organization_lineage”:[“https://openalex.org/I4210145204“],”host_organization_lineage_names“:[”DataCite“],“type”:“metadata”},“license”:null,“licence_id”:null,“version”:null}],“best_oa_location”:{“is_oa”:true,“landing_page_url”:“https://doi.org/10.109/lra.2022.3196139,“pdf_url”:https://ieeexplore.iee.org/ielx7/7083369/9831196/09849015.pdf,“源”:{“id”:https://openalex.org/S4210169774“,”display_name“:”IEEE robotics&automation letters“,”issn_l“:”2377-3766“,”isn“:[”2377-7766“],”is_oa“:false,”is_ in_doaj“:false,”host_organization“:”https://openalex.org/P4310318808“,”“host_organization_name”:“电气与电子工程师学会”,“host_ordanization_lineage”:[“https://openalex.org/P4310318808“],”host_organization_lineage_names“:[”电气与电子工程师协会“],“type”:“journal”},“license”:“cc-by”,“licence_id”:“https://openalex.org/licenses/cc-by“,”version“:”publishedVersion“,”is_accepted“:true,”is_published“:true},”sustainable_development_goals“:[{”score“:0.73,”id“:”https://metadata.un.org/sdg/10“,”display_name“:”减少不平等“}],”赠款“:[{”出资人“:”https://openalex.org/F4320321034“,”“funder_display_name”:“新能源和工业技术发展组织”,“award_id”:“JPNP20006”}],“数据集”:[],“版本”:[“https://openalex.org/W4292862358“],”referenced_works_count“:17,”referrenced_works“:[”https://openalex.org/W1491843047","https://openalex.org/W2068127265","https://openalex.org/W2158782408","https://openalex.org/W2751530711","https://openalex.org/W2763110165","https://openalex.org/W2894978157","https://openalex.org/W2962834855","https://openalex.org/W3000681444","https://openalex.org/W3037298378","https://openalex.org/W3094477866","https://openalex.org/W3117178794","https://openalex.org/W3130800560","https://openalex.org/W3138984732","https://openalex.org/W3143756065","https://openalex.org/W3195641273","https://openalex.org/W3196896000","https://openalex.org/W91088564“],”related_works“:[”https://openalex.org/W4384133558","https://openalex.org/W4380714744","https://openalex.org/W4293202849","https://openalex.org/W4286970243","https://openalex.org/W3201448254","https://openalex.org/W3025615835","https://openalex.org/W2066431708","https://openalex.org/W1980965563","https://openalex.org/W173210993","https://openalex.org/W1489300767“],”ngrams_url“:”https://api.openalex.org/works/W4292862358/ngrams“,”“abstract_inverted_index”:{“方法”:[0],”“基于”:[1],”基于“:[2],”生成“:[3],”对抗“:[4],”网络“:[5],”对于“:[6],”模仿“:[7],”学习“:[8,35115],”是“:[9,13],”有希望“:[10],”因为“:[11,32],”他们“:[12],”示例“:[14,45141],”高效“:[15],“in”:[16],“术语”:[17],“of”:[18,68116],“专家”:[19,89],“演示。”:[20],“然而,“:[21],“培训”:[22],“a”:[23,40,86,92134],“生成器”:[24],“需要”:[25],“许多”:[26],“交互”:[27,69],“与”:[28,70],“是”:[29,44,59,66,71,82,96102105114117120140],“实际”:[30,72106],“环境”:[31],“无模型”:[33],“加固”:[34,49],“is”:[36122],“采用”:[37],“to”:[38,64144],“update”:[39],“policy。“:[41],“To”:[42],“improve”:[43],“efficiency”:[46142],“using”:[47],“model-based”:[48,53],“learning,”:[50],“we”:【51】,“propose”:【52】,“Entropy-Regularized”:【54】,“Immution”:【55】,“learning”:【56】,“(MB-ERIL)”:【57】,“under”:【58】,“Entropy-regularizeed”:【60】,“Markov”:【61】,“decision”:[62],“过程”:[63],“减少”:[65],“数量”:[67],“环境”。“:[73],”MB-ERIL“:[74132],”使用“:[75],”两个“:[76],”鉴别器。“:[77],”A“:[78],”策略“:[79118],”鉴别器“:[80,94],”区分“:[81,95],”动作“:[83],”生成“:[84100],”由“:[88511],”机器人“:[87128],”from“:[88104],”ones“:[90],”and“:[9119126137],”model“:[993103121],”反事实“:[97],”state“:[98],”transitions“:[99],”ones“。“:[107],”We“:[108],”derive“:%109],”structured“:[110],”discriminators“:[111],”so“:+112],”that“:[11311],”efficient。“:[123],“计算机”:[124],“模拟”:[125],“真实”:[127],“实验”:[129],“显示”:[130],“实现”:[133],“竞争”:[135],“性能”:[136],“显著”:[138],“改善”:[139],“比较”:[143],“基线”:[145],“方法”。“:[146]},”cited_by_api_url“:”https://api.openalex.org/works?filter=cites:W4292862358“,”counts_by_year“:[{”年“:2023,”引用_by_count“:1}],”更新日期“:”2024-06-20T01:25:34.222129“,”创建日期“:“2022-08-24”}