{“id”:“https://openalex.org/W4388677802“,”doi“:”https://doi.org/10.1007/978-3-031-45170-6_19“,”title“:”带约束恢复的反向强化学习“,”display_name“:”带有约束恢复的逆向强化学习“、”publication_year“:2023,”publication_date“:”2023-01-01“,”ids“:{”openalex“:”https://openalex.org/W4388677802“,”doi“:”https://doi.org/10.1007/978-3-031-45170-6_19“},”language“:”en“,”primary_location“:{”is_oa“:false,”landing_page_url“:”https://doi.org/10.1007/978-3-031-45170-6_19“,”pdf_url“:空,”源“:{”id“:”https://openalex.org/S106296714“,”“display_name”“:”“计算机科学课堂讲稿”“,”issn_l“:”0302-9743“,”isn“:[”0302-7743“、”1611-3349“],”is_oa“:false,”is_in_doaj“:false,”host_organization“:”https://openalex.org/P4310318900“,”“host_organization_name”:“Springer Science+Business Media”,“host_organization_lineage”:[“https://openalex.org/P431031965","https://openalex.org/P4310318900“],”host_organization_lineage_names“:[”Springer Nature“,”Springer-Science+Business Media“],“type”:“book-series”},“license”:null,“licence_id”:nul,“version”:null,“is_accepted”:false,“is_published”:false},”type“:”book-chapter“,”type_crossref“:“book-chapter”,”indexed_in“:[“crossref”],”open_access“:{”is_oa“:false”“,”oa_url“:空,”any_repository_has_fulltext“:false},”作者身份“:[{”作者位置“:”第一个“,”作者“:{”id“:”https://openalex.org/A5065341673“,”display_name“:”N.C.Das“,”orcid“:null},”institutions“:[{”id“:”https://openalex.org/I68891433“,”display_name“:”德里印度理工学院“,”ror“:”https://ror.org/049tgcd06“,”country_code“:”IN“,”type“:“教育”,”世系“:[”https://openalex.org/I68891433“]}],”国家“:[”IN“],”is_corresponding“:false,”raw_author_name“:”Nirjhar Das“,”raw _affiliation_strings“:[“印度新德里印度理工学院”]},{“author_position”:“last”,“author”:{“id”:“https://openalex.org/A5086971942“,”display_name“:”Arpan Chattopadhyay“,”orcid“:null},”institutions“:[{”id“:”https://openalex.org/I68891433“,”display_name“:”德里印度理工学院“,”ror“:”https://ror.org/049tgcd06“,”country_code“:”IN“,”type“:“教育”,”世系“:[”https://openalex.org/I68891433“]}],”国家“:[”IN“],”is_corresponding“:false,”raw_author_name“:”Arpan Chattopadhyay“,”raw _ afiliation_strings“:[“印度新德里印度理工学院”]}]“countries _ distinct_count”:1,”institutions _ disting_count“:1,“corresponding_author_ids”:[],”correspounding_institution_ids“:[]”,“apc_list”:{“value”:5000,“currency”:“EUR”,“”value_usd“:5392,”出处“:”doaj“},”apc_payed“:{”value“:5000,”currency“:”EUR“,”value_usd:5392,“出处”:“doaj”},“has_fulltext”:true,“fulltext_origin”:“pdf”,“cited_by_count”:0,“cited_by_percentile_year”:{“min”:0、“max”:78},《图书》:{卷:null,“issue”:null、“first_page”:“”179“,”last_page“:”188“},”is_retracted“:false,”is_paratext“:false,”主主题“:{”id“:”https://openalex.org/T10462“,”display_name“:”强化学习算法“,”score“:1.0,”subfield“:{”id“:”https://openalex.org/subfields/1702“,”display_name“:”人工智能“},”字段“:{”id“:”https://openalex.org/fields/17“,”display_name“:”Computer Science“},”domain“:{”id“:”https://openalex.org/domains/3“,”display_name“:”物理科学“}},”主题“:[{”id“:”https://openalex.org/T10462“,”display_name“:”强化学习算法“,”score“:1.0,”subfield“:{”id“:”https://openalex.org/subfields/1702“,”display_name“:”人工智能“},”字段“:{”id“:”https://openalex.org/fields/17“,”display_name“:”Computer Science“},”domain“:{”id“:”https://openalex.org/domains/3“,”display_name“:”物理科学“}},{”id“:”https://openalex.org/T12794“,”“display_name”:“最优控制的自适应动态规划”,“score”:0.9948,“subfield”:{“id”:“https://openalex.org/subfields/1703“,”display_name“:”计算理论与数学“},”字段“:{”id“:”https://openalex.org/fields/17“,”display_name“:”Computer Science“},”domain“:{”id“:”https://openalex.org/domains/3“,”display_name“:”物理科学“}},{”id“:”https://openalex.org/T10653“,”“display_name”:“机器人抓取和示范学习”,“score”:0.9921,“subfield”:{“id”:“https://openalex.org/subfields/2207“,”display_name“:”控制与系统工程“},”字段“:{”id“:”https://openalex.org/fields/22“,”display_name“:”Engineering“},”domain“:{”id“:”https://openalex.org/domains/3“,”display_name“:”物理科学“}}],”关键词“:[{”id“:”https://openalex.org/keywords/reinforcement-learning网站“,”display_name“:”强化学习“,”score“:0.602299},{”id“:”https://openalex.org/keywords/robot-learning“,”display_name“:”机器人学习“,”score“:0.549165},{”id“:”https://openalex.org/keywords/adaptive-dynamic编程“,”display_name“:”自适应动态编程“,”score“:0.530418}],”concepts“:[{”id“:”https://openalex.org/C106189395,“wikidata”:https://www.wikidata.org/wiki/Q176789“,”display_name“:”Markov决策过程“,”level“:3,”score“:0.6350149},{”id“:”https://openalex.org/C126255220,“wikidata”:https://www.wikidata.org/wiki/Q141495“,”display_name“:”数学优化“,”level“:1,”score“:0.6312163},{”id“:”https://openalex.org/C41008148,“wikidata”:https://www.wikidata.org/wiki/Q21198(网址:https://www.wikidata.org/wiki/Q21198)“,”display_name“:”计算机科学“,”level“:0,”score“:0.626025},{”id“:”https://openalex.org/C97541855,“wikidata”:https://www.wikidata.org/wiki/Q830687“,”display_name“:”强化学习“,”level“:2,”score“:0.54180306},{”id“:”https://openalex.org/C2776036281,“wikidata”:https://www.wikidata.org/wiki/Q48769818“,”display_name“:”约束(计算机辅助设计)“,”level“:2,”score“:0.491081},{”id“:”https://openalex.org/C112680207,“wikidata”:https://www.wikidata.org/wiki/Q714886网址“,”display_name“:”正多边形“,”level“:2,”score“:0.48165524},{”id“:”https://openalex.org/C187691185,“wikidata”:https://www.wikidata.org/wiki/Q2020720“,”display_name“:”Grid“,”level“:2,”score“:0.46695527},{”id“:”https://openalex.org/C137836250,“wikidata”:https://www.wikidata.org/wiki/Q984063“,”“display_name”“:”优化问题“,”级别“:2,”分数“:0.4553067},{”id“:”https://openalex.org/C157972887,“wikidata”:https://www.wikidata.org/wiki/Q463359“,”display_name“:”凸优化“,”level“:3,”score“:0.45475402},{”id“:”https://openalex.org/C145446738,“wikidata”:https://www.wikidata.org/wiki/Q319913“,”display_name“:”凸函数“,”level“:3,”score“:0.44606107},{”id“:”https://openalex.org/C9679016,“wikidata”:https://www.wikidata.org/wiki/Q1417473“,”display_name“:”最大熵原理“,”level“:2,”score“:0.42162716},{”id“:”https://openalex.org/C154945302,“wikidata”:https://www.wikidata.org/wiki/Q11660“,”display_name“:”人工智能“,”level“:1,”score“:0.27678597},{”id“:”https://openalex.org/C33923547,“wikidata”:https://www.wikidata.org/wiki/Q395网址“,”display_name“:”数学“,”等级“:0,”分数“:0.23473385},{”id“:”https://openalex.org/C159886148,“wikidata”:https://www.wikidata.org/wiki/Q176645“,”display_name“:”Markov过程“,”level“:2,”score“:0.2295123},{”id“:”https://openalex.org/C105795698,“wikidata”:https://www.wikidata.org/wiki/Q12483“,”display_name“:”Statistics“,”level“:1,”score“:0.0},{”id“:”https://openalex.org/C2524010,“wikidata”:https://www.wikidata.org/wiki/Q8087“,”display_name“:”Geometry“,”level“:1,”score“:0.0}],”mesh“:[],”locations_count“:1.”locations“:[{”is_oa“:false,”landing_page_url“:”https://doi.org/10.1007/978-3-031-45170-6_19“,”pdf_url“:空,”源“:{”id“:”https://openalex.org/S106296714“,”“display_name”“:”“计算机科学课堂讲稿”“,”issn_l“:”0302-9743“,”isn“:[”0302-7743“、”1611-3349“],”is_oa“:false,”is_in_doaj“:false,”host_organization“:”https://openalex.org/P4310318900“,”“host_organization_name”:“Springer Science+Business Media”,“host_organization_lineage”:[“https://openalex.org/P431031965","https://openalex.org/P4310318900“],”host_organization_lineage_names“:[”Springer Nature“,”Springer Science+Business Media“],”type“:”系列丛书“},”license“:null,”license_id“:null,”version“:null,”is_accepted“:false,”is_published“:false}],”best_oa_location“:null,”可持续发展目标“:[{”score“:0.49,”id“:”https://metadata.un.org/sdg/16“,”display_name“:”和平、正义和强大的机构“}],”grants“:[],”datasets“:[],”versions“:[】,”referenced_works_count“:9,”refernced_works“:[”https://openalex.org/W1518931405","https://openalex.org/W1986014385","https://openalex.org/W1999874108","https://openalex.org/W2107726111","https://openalex.org/W2169498096","https://openalex.org/W2737702598","https://openalex.org/W3023096123","https://openalex.org/W4205841652","https://openalex.org/W4214717370“],”related_works“:[”https://openalex.org/W4284974072","https://openalex.org/W4225269853","https://openalex.org/W3096874164","https://openalex.org/W2937181779","https://openalex.org/W2386410636","https://openalex.org/W2357975469","https://openalex.org/W2341346307","https://openalex.org/W2145363145","https://openalex.org/W1985560493","https://openalex.org/W1626977535“],”ngrams_url“:”https://api.openalex.org/works/W4388677802/ngrams网站“,”“abstract_inverted_index”:{“In”:[0,19,47],“this”:[1,48],“work”,:[2,49],“we”:[3,50,72114],“propose”:[4],“a”:[5,38,86],“novel”:【6】,“inverse”:【7,24],“reinforcement”:【8】,“learning”:【9】,“(IRL)”:【10】,“algorithma”:【11109120】,“for”:【12,43121】,“constrained”:【13,87,97】,“马尔可夫”:[14],“决策”:[15],“过程”:[16],“(CMDP)”:[17],“问题。”:[18],“标准“:[20],”IRL“:[21,76],”问题“:[22],”函数“:[23,31,35,44,56,60,64,67,75116122],”学习者“:[25],”或“:[26],”代理“:[27],”寻求“:[28],”到“:[29,52,94110],”恢复“:[30],”奖励“:[32,57],”函数”:[33],“of”:[34,40,59,69118],”MDP“:[36],”给定“:[37],“设置”:[39],“轨迹”:[41],“演示”:[42],“最佳”:[45],“策略”。“:[46],”查找“:[51],”推断“:[53],”非“:[54],”仅“:[55],”函数“:[58],”CMDP,“:[61],”但“:[62],”还“:[63],”约束。“:[65],”使用“:[66],”原则“:[68],”最大值“:[70],”熵“:[71],”显示“:[73],”that“:[74],”with“:[77],”constraint“:[78],”recovery“:[79],”(IRL-CR)“:[80],”problem“:[81,99],”can“:[82],”be“:[83],”cast“:[84],”as“:[85],”non-vex“:[88],”optimization“:[89],98],“问题。“:[90],”We“:[91104],”reduce“:[92],”it“:[93],”an“:[95],”alternative“:/96],”which“:[100],”sub-problems“:[101],”are“:[102],”凸面。“:[103],”使用“:[105],”指数“:[106],”梯度“:[107],”下降“:[108],”求解“:[111],”它。“:[112],”最后,“:[113],”演示“:[115],”功效“:[117],”我们的“:[119],”网格“:[123],”世界“:[124],”环境。“:[125]},”cited_by_api_url“:”https://api.openalex.org/works?filter=cites:W4388677802“,”counts_by_year“:[],”updated_date“:”2024-05-23T02:44:28.716537“,”created_date:“2023-11-15”}“