{“id”:“https://openalex.org/W3088674748“,”doi“:”https://doi.org/10.3233/ia-180011“,”title“:”On the use of the policy gradient and Hessian in inverse reinforcement learning“,”display_name“:”On the use of policy gradient and Hessian in逆强化学习“,”publication_year“:2020,”publitation_date“:”2020-09-17“,”ids“:{”openalex“:”https://openalex.org/W3088674748“,”doi“:”https://doi.org/10.3233/ia-180011“,”mag“:”3088674748“},”language“:”en“,”primary_location“:{”is_oa“:false,”landing_page_url“:”https://doi.org/10.3233/ia-180011“,”pdf_url“:空,”源“:{”id“:”https://openalex.org/S207922018“,”display_name“:”Intelligenza Artificiale/Intelligenza Artificiale“,”issn_l“:”1724-8035“,”isn“:[”1724-8335“,“2211-0097”],”is_oa“:false,”is_ in_doaj“:false,”is_core“:true,”host_organization“:”https://openalex.org/P4310318577“,”“host_organization_name”:“IOS新闻”,“host_organization_lineage”:[“https://openalex.org/P4310318577“],”host_organization_lineage_names“:[”IOS Press“],”type“:”journal“},”license“:null,”license_id“:null,”version“:null,”is_accepted“:false,”is_published“:false},”type“:”article“,”type_crossref“:”journal article“,”indexed_in“:[”crossref“],”open_access“:{”is_oa“:false,”oa_status“:”closed“,”oa_url“:null,”any_repository_has_fulltext“:false},“authorships“:[{”author_position“:”first“,”author“:{”id“:”https://openalex.org/A5037963323“,”display_name“:”Alberto Maria Metelli“,”orcid“:”https://orcid.org/0000-0002-3424-5212“},”机构“:[{”id“:”https://openalex.org/I93860229“,”display_name“:”米兰理工大学“,”ror“:”https://ror.org/01nffqt88“,”country_code“:”IT“,”type“:“教育”,”世系“:[”https://openalex.org/I93860229“]}],”国家“:[”IT“],”is_corresponding“:true,”raw_author_name“:”Alberto Maria Metelli“,”raw _ affiliation_strings“:[“DEIB,Politecnico di Milano,Milan,Italy”],”affiliation“:[{”raw _affiliation_string“:”DEIBhttps://openalex.org/I93860229“]}]},{”author_position“:”middle“,”author“:{”id“:”https://openalex.org/A5091526684“,”display_name“:”Matteo Pirotta“,”orcid“:null},”institutions“:[{”id“:”https://openalex.org/I4210138412“,”display_name“:”Inria Lille-北欧中心“,”ror“:”https://ror.org/04eej9726“,”“country_code”“:”FR“,”type“:”facility“,”lineage“:[”https://openalex.org/I1326498283","https://openalex.org/I4210138412“]}],”国家“:[”FR“],”is_corresponding“:false,”raw_author_name“:”Matteo Pirotta“,”raw _ afiliation_strings“:[“SequeL,Inria Lille\u2013北欧,Villeneuv d\u2019Ascq,法国”],”affiliations“:[{”raw _affiliation_string“:”SequeL、Inria Rille\u2013North Europe,Villenneu2019ascq,France“,”institution_ids“:[https://openalex.org/I4210138412“]}]},{”author_position“:”last“,”author“:{”id“:”https://openalex.org/A5017130830“,”display_name“:”Marcello Restelli“,”orcid“:”https://orcid.org/0000-0002-6322-1076“},”机构“:[{”id“:”https://openalex.org/I93860229“,”display_name“:”米兰理工大学“,”ror“:”https://ror.org/01nffqt88“,”country_code“:”IT“,”type“:“教育”,”世系“:[”https://openalex.org/I93860229“]}],”国家“:[”IT“],”is_corresponding“:false,”raw_author_name“:”Marcello Restelli“,”raw _affiliation_strings“:[“DEIB,Politecnico di Milano,Milan,Italy”],”affiliations“:[{”raw _affiliation_string“:”DEIBhttps://openalex.org/I93860229“]}]}],”countries_disticont_count“:2,”institutions_disticent_count”:2,“corresponding_author_ids”:[“https://openalex.org/A5037963323“],”对应的机构ID“:[”https://openalex.org/I93860229“],”apc_list“:null,”apc _ paid“:nul,”fwci“:0.156,”has_fulltext“:false,”cited_by_count“:1,”cited_by_percentile_year“:{”min“:64,”max“:72},”biblio“:{”volume“:”14“,”issue“:”1“,”first_page“:”117“,”last_page“:“150”},“is_retracted”:false“,”is_paratext“:false,”primary_topic“:{'”id“:”https://openalex.org/T10462“,”display_name“:”强化学习算法“,”score“:0.9997,”subfield“:{”id“:”https://openalex.org/subfields/1702“,”display_name“:”人工智能“},”字段“:{”id“:”https://openalex.org/fields/17“,”display_name“:”Computer Science“},”domain“:{”id“:”https://openalex.org/domains/3“,”display_name“:”物理科学“}},”主题“:[{”id“:”https://openalex.org/T10462“,”display_name“:”强化学习算法“,”score“:0.9997,”subfield“:{”id“:”https://openalex.org/subfields/1702“,”display_name“:”人工智能“},”字段“:{”id“:”https://openalex.org/fields/17“,”display_name“:”Computer Science“},”domain“:{”id“:”https://openalex.org/domains/3“,”display_name“:”物理科学“}},{”id“:”https://openalex.org/T10653“,”“display_name”:“机器人抓取和示范学习”,“score”:0.9875,“subfield”:{“id”:“https://openalex.org/subfields/2207“,”display_name“:”控制与系统工程“},”字段“:{”id“:”https://openalex.org/fields/22“,”display_name“:”Engineering“},”domain“:{”id“:”https://openalex.org/domains/3“,”display_name“:”物理科学“}},{”id“:”https://openalex.org/T10848“,”“display_name”:“进化算法中的多目标优化”,“score”:0.9782,“subfield”:{“id”:“https://openalex.org/subfields/1703“,”display_name“:”计算理论与数学“},”field“:{”id“:”https://openalex.org/fields/17“,”display_name“:”Computer Science“},”domain“:{”id“:”https://openalex.org/domains/3“,”display_name“:”物理科学“}}],”关键词“:[{”id“:”https://openalex.org/keywords/reinforcement-learning网站“,”display_name“:”强化学习“,”score“:0.63447},{”id“:”https://openalex.org/keywords/robot-learning“,”display_name“:”Robot Learning“,”score“:0.543474}],”concepts“:[{”id“:”https://openalex.org/C97541855,“wikidata”:https://www.wikidata.org/wiki/Q830687“,”display_name“:”强化学习“,”level“:2,”score“:0.8357308},{”id“:”https://openalex.org/C41008148,“wikidata”:https://www.wikidata.org/wiki/Q21198“,”display_name“:”计算机科学“,”level“:0,”score“:0.72417915},{”id“:”https://openalex.org/C14036430网址,“wikidata”:https://www.wikidata.org/wiki/Q3736076“,”display_name“:”功能(生物学)“,”级别“:2,”分数“:0.7199854},{”id“:”https://openalex.org/C203616005,“wikidata”:https://www.wikidata.org/wiki/Q620495“,”display_name“:”Hessian矩阵“,”level“:2,”score“:0.6464127},{”id“:”https://openalex.org/C177264268,“wikidata”:https://www.wikidata.org/wiki/Q1514741“,”display_name“:”Set(abstract data type)“,”level“:2,”score“:0.5185262},{”id“:”https://openalex.org/C154945302,“wikidata”:https://www.wikidata.org/wiki/Q11660“,”display_name“:”人工智能“,”level“:1,”score“:0.50993717},{”id“:”https://openalex.org/C2778572836,“wikidata”:https://www.wikidata.org/wiki/Q380933“,”display_name“:”空格(标点符号)“,”level“:2,”score“:0.4964226},{”id“:”https://openalex.org/C119857082,“wikidata”:https://www.wikidata.org/wiki/Q2539“,”display_name“:”机器学习“,”level“:1,”score“:0.42154413},{”id“:”https://openalex.org/C140779682,“wikidata”:https://www.wikidata.org/wiki/Q210868网址“,”display_name“:”采样(信号处理)“,”level“:3,”score“:0.41487524},{”id“:”https://openalex.org/C126255220,“wikidata”:https://www.wikidata.org/wiki/Q141495“,”display_name“:”数学优化“,”level“:1,”score“:0.36070213},{”id“:”https://openalex.org/C33923547,“wikidata”:https://www.wikidata.org/wiki/Q395“,”display_name“:”Mathematics“,”level“:0,”score“:0.2023305},{”id“:”https://openalex.org/C28826006,“wikidata”:https://www.wikidata.org/wiki/Q33521“,”display_name“:”应用数学“,”level“:1,”score“:0.07978636},{”id“:”https://openalex.org/C78458016,“wikidata”:https://www.wikidata.org/wiki/Q840400“,”display_name“:”进化生物学“,”level“:1,”score“:0.0},{”id“:”https://openalex.org/C86803240,“wikidata”:https://www.wikidata.org/wiki/Q420“,”display_name“:”生物学“,”等级“:0,”分数“:0.0},{”id“:”https://openalex.org/C199360897,“wikidata”:https://www.wikidata.org/wiki/Q9143“,”display_name“:”编程语言“,”level“:1,”score“:0.0},{”id“:”https://openalex.org/C111919701,“wikidata”:https://www.wikidata.org/wiki/Q9135“,”display_name“:”操作系统“,”level“:1,”score“:0.0},{”id“:”https://openalex.org/C106131492,“wikidata”:https://www.wikidata.org/wiki/Q3072260“,”display_name“:”筛选器(信号处理)“,”level“:2,”score“:0.0},{”id“:”https://openalex.org/C31972630,“wikidata”:https://www.wikidata.org/wiki/Q844240“,”display_name“:”计算机视觉“,”level“:1,”score“:0.0}],”mesh“:[],”locations_count“:1.”locations“:[{”is_oa“:false,”landing_page_url“:”https://doi.org/10.3233/ia-180011“,”pdf_url“:空,”源“:{”id“:”https://openalex.org/S207922018“,”display_name“:”Intelligenza Artificiale/Intelligenza Artificiale“,”issn_l“:”1724-8035“,”isn“:[”1724-8335“,“2211-0097”],”is_oa“:false,”is_ in_doaj“:false,”is_core“:true,”host_organization“:”https://openalex.org/P4310318577“,”host_organization_name“:”IOS Press“,”host_organization_lineage“:[”https://openalex.org/P4310318577“],”host_organization_lineage_names“:[”IOS Press“],“type”:“journal”},“license”:null,“licence_id”:null,“version”:nul,“is_accepted”:false,“is_published”:false}],“best_oa_location”:null,“sustainable_development_goals”:[{“display_name”:“和平、正义和强大的制度”,“id”:”https://metadata.un.org/sdg/16“,”score“:0.76}],”grants“:[],”datasets“:[],”versions“:[】,”referenced_works_count“:18,”referrenced_works“:【”https://openalex.org/W1777239053","https://openalex.org/W1975463331","https://openalex.org/W1986014385","https://openalex.org/W2037064199","https://openalex.org/W2056132907","https://openalex.org/W2076337359","https://openalex.org/W2093524643","https://openalex.org/W2119717200","https://openalex.org/W2121517924","https://openalex.org/W2125612430","https://openalex.org/W2144787788","https://openalex.org/W2271263738","https://openalex.org/W2535247013","https://openalex.org/W2751530711","https://openalex.org/W3005862512","https://openalex.org/W3006656721","https://openalex.org/W3027095131","https://openalex.org/W4241521318“],”related_works“:[”https://openalex.org/W4283017538","https://openalex.org/W3021699548","https://openalex.org/W2802707792","https://openalex.org/W2611031068","https://openalex.org/W2569979269","https://openalex.org/W2075777916","https://openalex.org/W2015677538","https://openalex.org/W1996936972","https://openalex.org/W1704347466","https://openalex.org/W1545275724“],”ngrams_url“:”https://api.openalex.org/works/W3088674748/ngrams网站“,”“abstract_inverted_index”:{“强化”:[0,52],“学习”:[1,53],“(RL)”:[2],“是”:[3,16,37,88,99153],“an”:[4136142],“有效”:[5],“方法”:[6105163],“到”:[7,22,42,73,80110116132170],“解决”:[8],“顺序”:[9],“决策”:[10],“制定”:[11],“问题”:[12],“当”:[13],“:[14,24,62,67,78119125128139146184199],”环境“:[15,79],“装备”:[17],“有”:[18198],“a”:[19,34,57,92101112151157],“奖励”:[20,35,58,83121147152185201],“功能”:[21,36,59113186],“评估”:[23,81],“代理\u2019s”:[25],“操作”。“:[26],”“然而,”“:[27],”there“:[28],”are“:[29,49],”several“:[30],”domains“:[31],”in“:[32,71,86149203],”which“:[33150],”not“:[38108],”available“:[39],”and“:[40179],”hard“:[41],”estimate“。“:[43],“When”:[44],“samples”:[45],“of”:[46,66,94205],“expert”:[47],“agents”:[48],“available”,“:[50],“Inverse”:[51],“(IRL)”:[54],“allow”:[55191],“recovering”:[56],“that”:[60106127183194],“example”:[61],“demoved”:[63],“behavior”。“:[64],”大多数“:[65],”经典“:[68],”IRL“:[69104],”方法“:[70],”加法“:[72],”专家\u2019s“:[74120],”演示“:[75],”要求“:[76109],”采样“:[77],”每个“:[82],”函数“:[84148202],”那个“:[85],”转弯“:[87],”建造“:[89],”开始“:[90],”从“:[91],“设置”:[93],“工程化”:[95],“功能。“:[96],“This”:[97],“paper”:[98],“about”:[100],“novel”:[102],“model-free”:[103],“does”:[107],“specify”:[111],“space”:[114144],“where”:+115],“search”:117],“for”:[1185145164],“function”。“:[122],”杠杆“:[123],”on“:[124176],”fact“:[126],”policy“:[129],”gradient“:[130],”needs“:[131],”be“:[133],”zero“:[134],”optimal“:[137],”policy“:%138],”algorithm“:%140190],”generates“:[141],”approximation“:[143],”singled“:[154],”out“:[155],”using“:[156],”二阶“:[158],”criteria●●●●。“:[159],”之后“:[160],”引入“:[161],”我们的“:[162189],”有限“:[165178],”域,“:[166181],”we“:[167],”扩展“:[168],”it“:[169],”连续“:[171180],”ones。“:[172],”The“:[173],”Experimental“:[174],”results“,“results”:[175],”both“:[177],”show“:[182],”recovered“:[187],”by“:[188],”learning“:[192206],”policies“:[193],”exceller“:[195],”these“:[196],”getated“:[197],”true“:[200],”terms“:[204],”speed“。“:[207]},”引用_by_api_url“:”https://api.openalex.org/works?filter=cites:W3088674748“,”counts_by_year“:[{”年“:2022,”cited_by_count“:1}],”更新日期“:”2024-07-21T13:48:54.670162“,”创建日期“:“2020-10-01”}