{“id”:“https://openalex.org/W2271318666“,”doi“:”https://doi.org/10.1007/s10015-015-0260-7“,”title“:“基于EM的政策超参数探索:应用于两轮智能手机机器人的站立和平衡”,”display_name“:”基于EM政策超参数探究:应用于双轮智能机机器人的站立与平衡“,”publication_year“:2016,”publitation_date“:”2016-01-25“,”ids“:{”openalex“:”https://openalex.org/W2271318666“,”doi“:”https://doi.org/10.1007/s10015-015-0260-7“,”mag“:”2271318666“},”language“:”en“,”primary_location“:{”is_oa“:true,”landing_page_url“:”https://doi.org/10.1007/s10015-015-0260-7,“pdf_url”:https://link.springer.com/content/pdf/10.1007%2Fs10015-015-0260-7.pdf“,”来源“:{”id“:”https://openalex.org/S104439334“,”display_name“:”人造生命与机器人“,”issn_l“:”1433-5298“,”isn“:[”1433-52“,”1614-7456“],”is_oa“:false,”is_ in_doaj“:false,”is_core“:true,”host_organization“:”https://openalex.org/P4310318900“,”“host_organization_name”:“Springer Science+Business Media”,“host_organization_lineage”:[“https://openalex.org/P431031965","https://openalex.org/P4310318900“],”host_organization_lineage_names“:[”Springer Nature“,”Springr Science+Business Media“],“type”:“journal”},“license”:“cc-by”,“licence_id”:“https://openalex.org/licenses/cc-by“,”version“:”publishedVersion“,”is_accepted“:true,”is_published“:true},”type“:”article“,”type_crossref“:“journal-article”,”indexed_in“:[”crossref“],”open_access“:{”is_oa“:true,”oa_status“:”hybrid“,”oa_url“:”https://link.springer.com/content/pdf/10.1007%2Fs10015-015-0260-7.pdf“,”any_repository_has_fulltext“:false},”作者“:[{”作者位置“:”第一个“,”作者”:{“id”:“https://openalex.org/A5002996776“,”display_name“:”Jiexin Wang“,”orcid“:”https://orcid.org/0000-0002-3286-3711},“机构”:[{“id”:https://openalex.org/I22299242“,”display_name“:”京都大学“,”ror“:”https://ror.org/02kpeqv85“,”country_code“:”JP“,”type“:“教育”,”世系“:[”https://openalex.org/I22299242“]}],”国家“:[”JP“],”is_corresponding“:false,”raw_author_name“:”Jiexin Wang“,”raw _ afiliation_strings“:[“日本京都京都大学”],”affiliations“:[{”raw_ afiliation _string“:”京都大学,日本京都“,”institution_ids“:[https://openalex.org/I22299242“]}]},{”author_position“:”middle“,”author“:{”id“:”https://openalex.org/A5031054137“,”display_name“:”Eiji Uchibe“,”orcid“:”https://orcid.org/0000-0001-7908-0258},“机构”:[{“id”:https://openalex.org/I142637625“,”display_name“:”冲绳理工大学研究生院“,”ror“:”https://ror.org/02qg15b79“,”country_code“:”JP“,”type“:“教育”,”世系“:[”https://openalex.org/I142637625“]}],”国家“:[”JP“],”is_corresponding“:false,”raw_author_name“:”Eiji Uchibe“,”raw _ afiliation_strings“:[“冲绳科学技术研究所,日本冲绳”],”affiliations“:[{”raw _affiliation_strong“:”冲绳科学与技术研究所(日本冲绳),“institution_ids”:[“https://openalex.org/I142637625“]}]},{”author_position“:”last“,”author“:{”id“:”https://openalex.org/A5004840638“,”display_name“:”Kenji Doya“,”orcid“:”https://orcid.org/0000-0002-2446-6820},“机构”:[{“id”:https://openalex.org/I142637625“,”display_name“:”冲绳理工大学研究生院“,”ror“:”https://ror.org/02qg15b79“,”country_code“:”JP“,”type“:“教育”,”世系“:[”https://openalex.org/I142637625“]}],”国家“:[”JP“],”is_corresponding“:false,”raw_author_name“:”Kenji Doya“,”raw _ afiliation_strings“:[“冲绳科学技术研究所,日本冲绳”],”affiliations“:[{”raw _affiliation_strong“:”冲绳科学与技术研究所(日本冲绳),“institution_ids”:[“https://openalex.org/I142637625“]}]}],”institution_assertions“:[],”countries_distiction_count“:1,”institutions_disticent_count”:2,”corresponding_author_ids“:[[],”corresponding_institution_ids”:[]、“apc_list”:{“value”:2390,“currency”:“EUR”、“value_usd”:2990,“provenance”:“doaj”},“apc_payed”:{“value“:2390”,“curency”:“欧元”,“value_usd”:2990,“出处”:“doaj”},“fwci”:0.267,“has_fulltext“:true,”fulltext_origin“:”pdf“,”cited_by_count“:8,”citation_normalized_percentile“:{”value“:0.874156,”is_in_top_1_percent“:false,”is_in_top_10_percennt“:false},”citted_by_count_year“:{“min”:85,“max”:86},“biblio”:{“volume”:“21”,“issue”:“1”,“first_page”:“125”,“last_page”:”“131”},“is_retracted”:false,“is_paratext”:false,“主主题“:{”id“:”https://openalex.org/T10462“,”display_name“:”强化学习算法“,”score“:0.9995,”subfield“:{”id“:”https://openalex.org/subfields/1702“,”display_name“:”Artificial Intelligence“},”field“:{”id“:”https://openalex.org/fields/17“,”display_name“:”Computer Science“},”domain“:{”id“:”https://openalex.org/domains/3“,”display_name“:”物理科学“}},”主题“:[{”id“:”https://openalex.org/T10462“,”display_name“:”强化学习算法“,”score“:0.9995,”subfield“:{”id“:”https://openalex.org/subfields/1702“,”display_name“:”Artificial Intelligence“},”field“:{”id“:”https://openalex.org/fields/17“,”display_name“:”Computer Science“},”domain“:{”id“:”https://openalex.org/domains/3“,”display_name“:”物理科学“}},{”id“:”https://openalex.org/T10603“,”display_name“:”智能电网中的需求响应“,”score“:0.9872,”subfield“:{”id“:”https://openalex.org/subfields/2208“,”display_name“:”电气与电子工程“},”字段“:{”id“:”https://openalex.org/fields/22“,”display_name“:”Engineering“},”domain“:{”id“:”https://openalex.org/domains/3“,”display_name“:”物理科学“}},{”id“:”https://openalex.org/T1975“,”“display_name”:“遗传编程在机器学习中的应用”,“score”:0.9819,“subfield”:{“id”:“https://openalex.org/subfields/1702“,”display_name“:”Artificial Intelligence“},”field“:{”id“:”https://openalex.org/fields/17“,”display_name“:”Computer Science“},”domain“:{”id“:”https://openalex.org/domains/3“,”display_name“:”物理科学“}}],”关键词“:[{”id“:”https://openalex.org/keywords/reinforcement-learning网站“,”display_name“:”强化学习“,”score“:0.541348},{”id“:”https://openalex.org/keywords/policy-gradient网站“,”display_name“:”策略渐变“,”score“:0.5395},{”id“:”https://openalex.org/keywords/classification-of-discontinuities网站“,”display_name“:”不连续性分类“,”score“:0.4224074}],”concepts“:[{”id“:”https://openalex.org/C41008148,“wikidata”:https://www.wikidata.org/wiki/Q21198“,”display_name“:”计算机科学“,”level“:0,”score“:0.7577121},{”id“:”https://openalex.org/C2780451532,“wikidata”:https://www.wikidata.org/wiki/Q759676“,”display_name“:”Task(project management)“,”level“:2,”score“:0.61850256},{”id“:”https://openalex.org/C97541855,“wikidata”:https://www.wikidata.org/wiki/Q830687“,”display_name“:”强化学习“,”level“:2,”score“:0.56645375},{”id“:”https://openalex.org/C90509273,“wikidata”:https://www.wikidata.org/wiki/Q10112“,”display_name“:”Robot“,”level“:2,”score“:0.5551008},{”id“:”https://openalex.org/C1929221069网址,“wikidata”:https://www.wikidata.org/wiki/Q550134“,”display_name“:”倒立摆“,”level“:3,”score“:0.52163893},{”id“:”https://openalex.org/C65655974,“wikidata”:https://www.wikidata.org/wiki/Q14867674“,”display_name“:”Swing“,”level“:2,”score“:0.51951396},{”id“:”https://openalex.org/C1960 83921,“wikidata”:https://www.wikidata.org/wiki/Q7915758“,”display_name“:”差异(会计)“,”级别“:2,”分数“:0.5015321},{”id“:”https://openalex.org/C154945302,“wikidata”:https://www.wikidata.org/wiki/Q11660“,”display_name“:”人工智能“,”level“:1,”score“:0.46301386},{”id“:”https://openalex.org/C15627037,“wikidata”:https://www.wikidata.org/wiki/Q541961“,”display_name“:”不连续性分类“,”level“:2,”score“:0.4224074},{”id“:”https://openalex.org/C83546350,“wikidata”:https://www.wikidata.org/wiki/Q1139051“,”display_name“:”回归“,”级别“:2,”分数“:0.41631895},{”id“:”https://openalex.org/C119857082,“wikidata”:https://www.wikidata.org/wiki/Q2539“,”display_name“:”机器学习“,”level“:1,”score“:0.3537144},{”id“:”https://openalex.org/C47446073,“wikidata”:https://www.wikidata.org/wiki/Q5165890“,”display_name“:”控制理论(社会学)“,”level“:3,”score“:0.33239758},{”id“:”https://openalex.org/C33923547,“wikidata”:https://www.wikidata.org/wiki/Q395“,”display_name“:”数学“,”等级“:0,”分数“:0.19020554},{”id“:”https://openalex.org/C105795698,“wikidata”:https://www.wikidata.org/wiki/Q12483“,”display_name“:”Statistics“,”level“:1,”score“:0.1471985},{”id“:”https://openalex.org/C2775924081,“wikidata”:https://www.wikidata.org/wiki/Q55608371“,”display_name“:”Control(management)“,”level“:2,”score“:0.14066157},{”id“:”https://openalex.org/C134306372,“wikidata”:https://www.wikidata.org/wiki/Q7754“,”display_name“:”数学分析“,”level“:1,”score“:0.0},{”id“:”https://openalex.org/C121332964,“wikidata”:https://www.wikidata.org/wiki/Q413“,”display_name“:”物理“,”级别“:0,”分数“:0.0},{”id“:”https://openalex.org/C144133560,“wikidata”:https://www.wikidata.org/wiki/Q4830453“,”display_name“:”Business“,”level“:0,”score“:0.0},{”id“:”https://openalex.org/C187736073,“wikidata”:https://www.wikidata.org/wiki/Q2920921“,”display_name“:”管理“,”级别“:1,”分数“:0.0},{”id“:”https://openalex.org/C121955636,“wikidata”:https://www.wikidata.org/wiki/Q4116214“,”display_name“:”Accounting“,”level“:1,”score“:0.0},{”id“:”https://openalex.org/C158622935,“wikidata”:https://www.wikidata.org/wiki/Q660848“,”display_name“:”非线性系统“,”level“:2,”score“:0.0},{”id“:”https://openalex.org/C62520636,“wikidata”:https://www.wikidata.org/wiki/Q944“,”display_name“:”量子力学“,”level“:1,”score“:0.0},{”id“:”https://openalex.org/C24890656,“wikidata”:https://www.wikidata.org/wiki/Q82811“,”display_name“:”Acoustics“,”level“:1,”score“:0.0},{”id“:”https://openalex.org/C162324750,“wikidata”:https://www.wikidata.org/wiki/Q8134“,”display_name“:”Economics“,”level“:0,”score“:0.0}],”mesh“:[],”locations_count“:1,”location“:[{”is_oa“:true,”landing_page_url“:”https://doi.org/10.1007/s10015-015-0260-7,“pdf_url”:https://link.springer.com/content/pdf/10.1007%2Fs10015-015-0260-7.pdf“,”来源“:{”id“:”https://openalex.org/S104439334“,”display_name“:”人造生命与机器人“,”issn_l“:”1433-5298“,”isn“:[”1433-52“,”1614-7456“],”is_oa“:false,”is_ in_doaj“:false,”is_core“:true,”host_organization“:”https://openalex.org/P4310318900“,”“host_organization_name”:“Springer Science+Business Media”,“host_organization_lineage”:[“https://openalex.org/P431031965","https://openalex.org/P4310318900“],”host_organization_lineage_names“:[”Springer Nature“,”Springr Science+Business Media“],“type”:“journal”},“license”:“cc-by”,“licence_id”:“https://openalex.org/licenses/cc-by“,”version“:”publishedVersion“,”is_accepted“:true,”is_published“:true}],”best_oa_location“:{”is_oa“:true,”landing_page_url“:”https://doi.org/10.1007/s10015-015-0260-7,“pdf_url”:https://link.springer.com/content/pdf/10.1007%2Fs10015-015-0260-7.pdf“,”来源“:{”id“:”https://openalex.org/S104439334“,”display_name“:”人造生命与机器人“,”issn_l“:”1433-5298“,”isn“:[”1433-52“,”1614-7456“],”is_oa“:false,”is_ in_doaj“:false,”is_core“:true,”host_organization“:”https://openalex.org/P4310318900“,”“host_organization_name”:“Springer Science+Business Media”,“host_organization_lineage”:[“https://openalex.org/P431031965","https://openalex.org/P4310318900“],”host_organization_lineage_names“:[”Springer Nature“,”Springr Science+Business Media“],“type”:“journal”},“license”:“cc-by”,“licence_id”:“https://openalex.org/licenses/cc-by“,”version“:”publishedVersion“,”is_accepted“:true,”is_published“:true},”sustainable_development_goals“:[],”grants“:[],”datasets“:],”versions“:[https://openalex.org/W1925816294","https://openalex.org/W1971492381","https://openalex.org/W1989144314","https://openalex.org/W2012587148","https://openalex.org/W2109169869","https://openalex.org/W2112036188","https://openalex.org/W2113501460","https://openalex.org/W2119717200","https://openalex.org/W2127107099","https://openalex.org/W2137104525","https://openalex.org/W2146737184“],”related_works“:[”https://openalex.org/W34871393","https://openalex.org/W3168108534","https://openalex.org/W2798244654","https://openalex.org/W2689391174","https://openalex.org/W2614575562","https://openalex.org/W2529137940","https://openalex.org/W2372645633","https://openalex.org/W2360051520网址","https://openalex.org/W1972096828","https://openalex.org/W1486689224“],”abstract_inverted_index“:{”This“:[0],”paper“:[1],”proposes“:[2],”a“:[3,35,47108127],”novel“:[4],”policy“:[5,37,43,53,65],”search“:⑹,”algorithm“:%7],”call“:[8],”EM-based“:[9,28,61],”policy“:[10,21],”Hyper“:[11],”Parameter“:[12,24],”Exploration“:%13,25],”(EPHE)“:[14],”其中“:[15],”集成“:[16],”两个“:[17],”加强“:[18],”学习“:[19,81120122],”算法:“:[20],”梯度“:[22],”with“:[23,41129],”(PGPE)“:[26],”and“:[27,57,77101105],”Reward-Weighted“:[29,62],”Regression。“:[30],”Like“:[31],”PGPE“:[32],”EPHE“:[33116],”evaluations“:[34],”determinative“:[36],”in“:[38,91],”each“:[39],”seption“:[40],”the“:[42,52,64,80,92],”parameters“:[44,55,67],”sampled“:[45],”from“:[46],”previor“:[48],”distribution“:[49],”givent“:[50],”by“:[51,92]70],“超”:[54,66],“(平均值”:[56],“方差”)。“:[58],”基于“:[59],”on“:[60],”回归“:[63],”are“:[68,83],”updated“:[69],”reward-weighted“:[71],”averaging“:[72],”so“:%73],”that“:[74115],”gradient“:[75],”calculation“:[76],”tuning“:[78124],”of“:[79,94103107],”rate“:[82123],”not“,”required“。“:[85],“The”:[86],“proposed”:[87],“method”:[88],“is”:[89],“tested”:[90],“benchmarks”:[93],“sween”:[95],“swing-up”:[96],“task”,“:[97],“cart-pole”:[98],“balancing”:[99106],“task”:[100128],“simulation”:[102],“stating”:[104],“tweel”:[109],“智能手机”:[110],“robot”。“:[111],”实验“:[112],”结果“:[113],”显示“:[114],”可以“:[117],”实现“:[118],”高效“:[119],”没有“:[121],”偶数“:[125],”表示“:[126],”不连续。“:[130]},”cited_by_api_url“:”https://api.openalex.org/works?filter=cites:W2271318666“,”counts_by_year“:[{“年”:2024,”cited_by_count“:2},{“年份”:2023,”ciped_by_cunt“:1},”{“年度”:2022,“cited_by_count”:2}.,{”年“:2021,”cited_by_count 1“,”创建日期“:”2016-06-24“}