{“id”:“https://openalex.org/W4317928369“,”doi“:”https://doi.org/10.1007/978-3-031-24383-7_22“,”title“:“通过动态近似策略优化高效学习对话策略”,”display_name“:”通过动态近似政策优化有效学习对话策略“,”publication_year“:2022,”publitation_date“:”2022-01-01“,”ids“:{”openalex“:”https://openalex.org/W4317928369“,”doi“:”https://doi.org/10.1007/978-3-031-24383-7_22“},”language“:”en“,”primary_location“:{”is_oa“:false,”landing_page_url“:”https://doi.org/10.1007/978-3-031-24383-7_22“,”pdf_url“:null,”source“:null,”license“:null',”licence_id“:null,”version“:nuller,”is_accepted“:false,”is_published“:false},”type“:”book-chapter“,”type_crossref“:“book-chaapter”,”indexed_in“:[”crossref“],”open_access“:{”is_oa“:false,”oa_status“:”closed“,”oa_url”:null“,”any_repository_has_fulltext“:假}”发货“:[{”author_position“:”first“,”作者“:{”id“:”https://openalex.org/A5101193908“,”display_name“:”陈平黄“,”orcid“:null},”机构“:[{”id“:”https://openalex.org/I55712492“,”display_name“:”浙江工业大学“,”ror“:”https://ror.org/02djqfd08“,”country_code“:”CN“,”type“:“教育”,”世系“:[”https://openalex.org/I55712492“]}],”countries“:[”CN“],”is_corresponding“:false,”raw_author_name“:”Chenping Huang“,”raw _ afiliation_strings“:【”浙江工业大学计算机科学与技术学院,中国杭州“】,”affiliations“:[{”raw _affiliation_strong“:”浙江理工大学计算机科学技术学院,杭州“,”机构ID“:[”https://openalex.org/I55712492“]}]},{”author_position“:”last“,”author“:{”id“:”https://openalex.org/A5101755033“,”display_name“:”Bin Cao“,”orcid“:”https://orcid.org/0000-0003-1062-6309},“机构”:[{“id”:https://openalex.org/I55712492“,”display_name“:”浙江工业大学“,”ror“:”https://ror.org/02djqfd08“,”country_code“:”CN“,”type“:“教育”,”世系“:[”https://openalex.org/I55712492“]}],”国家“:[”CN“],”is_corresponding“:false,”raw_author_name“:”曹斌“,”raw_affiation_strings“:[”浙江工业大学计算机科学与技术学院,中国杭州“],”附属机构“:[{”raw_affiation_string“:”浙江工业大学计算机科学与技术学院,中国杭州“,”机构ID“:[”https://openalex.org/I55712492“]}]}],”institution_assertions“:[],”countries_distinact_count“:1,”institutions_disticant_count”:1,“corresponding_author_ids”:[]、”correspounding_institution_ids“:[],”apc_list“:null,”apc _payed“:nul,”fwci“:0.0,”has_fulltext“:false,”cited_by_count:0,”citation_normalized_percentile“:{”value“:0.05,”is_in_top_1_percent“:false,“is_in_top_10_percent”:错误},“cited_by_percentile_year“:{“min”:0,“max”:61},“biblio”:{”volume“:null,”issue“:nul,”first_page“:”396“,”last_page“:”414“},”is_retracted“:false,”is_paratext“:false,”primary_topic“:”{“id”:“https://openalex.org/T12031“,”“display_name”:“口语系统的对话行为建模”,“score”:0.9999,“subfield”:{“id”:“https://openalex.org/subfields/1702“,”display_name“:”人工智能“},”字段“:{”id“:”https://openalex.org/fields/17“,”display_name“:”Computer Science“},”domain“:{”id“:”https://openalex.org/domains/3“,”display_name“:”物理科学“}},”主题“:[{”id“:”https://openalex.org/T12031“,”“display_name”:“口语系统的对话行为建模”,“score”:0.9999,“subfield”:{“id”:“https://openalex.org/subfields/1702“,”display_name“:”人工智能“},”字段“:{”id“:”https://openalex.org/fields/17“,”display_name“:”Computer Science“},”domain“:{”id“:”https://openalex.org/domains/3“,”display_name“:”物理科学“}},{”id“:”https://openalex.org/T10028“,”display_name“:”自然语言处理“,”score“:0.9993,”subfield“:{”id“:”https://openalex.org/subfields/1702“,”display_name“:”人工智能“},”字段“:{”id“:”https://openalex.org/fields/17“,”display_name“:”Computer Science“},”domain“:{”id“:”https://openalex.org/domains/3“,”display_name“:”物理科学“}},{”id“:”https://openalex.org/T12128“,”display_name“:”服务业人工智能“,”score“:0.9937,”subfield“:{”id“:”https://openalex.org/subfields/1702“,”display_name“:”人工智能“},”字段“:{”id“:”https://openalex.org/fields/17“,”display_name“:”Computer Science“},”domain“:{”id“:”https://openalex.org/domains/3“,”display_name“:”物理科学“}}],”关键词“:[{”id“:”https://openalex.org/keywords/dialog-management网站“,”display_name“:”对话框管理“,”score“:0.578158},{”id“:”https://openalex.org/keywords/ticket“,”display_name“:”Ticket“,”score“:0.5729106},{”id“:”https://openalex.org/keywords/spoken-dialogue-systems网站“,”display_name“:”口语对话系统“,”score“:0.568417},{”id“:”https://openalex.org/keywords/reinforcement-learning网站“,”display_name“:”强化学习“,”score“:0.520417},{”id“:”https://openalex.org/keywords/topic-modeling网站“,”display_name“:”主题建模“,”score“:0.500235},{”id“:”https://openalex.org/keywords/policy-learning网站“,”display_name“:”策略学习“,”score“:0.44681433}],”concepts“:[{”id“:”https://openalex.org/C97541855,“wikidata”:https://www.wikidata.org/wiki/Q830687“,”display_name“:”强化学习“,”level“:2,”score“:0.7407625},{”id“:”https://openalex.org/C2780451532,“wikidata”:https://www.wikidata.org/wiki/Q759676“,”display_name“:”任务(项目管理)“,”级别“:2,”分数“:0.7401199},{”id“:”https://openalex.org/C41008148,“wikidata”:https://www.wikidata.org/wiki/Q21198“,”display_name“:”计算机科学“,”level“:0,”score“:0.7363741},{”id“:”https://openalex.org/C2776540713,“wikidata”:https://www.wikidata.org/wiki/Q7800647“,”display_name“:”Ticket“,”level“:2,”score“:0.5729106},{”id“:”https://openalex.org/C154945302,“wikidata”:https://www.wikidata.org/wiki/Q11660“,”display_name“:”人工智能“,”level“:1,”score“:0.51237285},{”id“:”https://openalex.org/C2779436431,“wikidata”:https://www.wikidata.org/wiki/Q30672407“,”display_name“:”策略学习“,”level“:2,”score“:0.44681433},{”id“:”https://openalex.org/C108583219,“wikidata”:https://www.wikidata.org/wiki/Q197536“,”display_name“:”深度学习“,”level“:2,”score“:0.42881355},{”id“:”https://openalex.org/C119857082,“wikidata”:https://www.wikidata.org/wiki/Q2539(网址:https://www.wikidata.org/wiki/Q2539)“,”display_name“:”机器学习“,”level“:1,”score“:0.36259753},{”id“:”https://openalex.org/C38652104,“wikidata”:https://www.wikidata.org/wiki/Q3510521“,”display_name“:”计算机安全“,”level“:1,”score“:0.14732957},{”id“:”https://openalex.org/C127413603,“wikidata”:https://www.wikidata.org/wiki/Q11023“,”display_name“:”Engineering“,”level“:0,”score“:0.11943221},{”id“:”https://openalex.org/C201995342,“wikidata”:https://www.wikidata.org/wiki/Q682496“,”display_name“:”系统工程“,”level“:1,”score“:0.0}],”mesh“:[],”locations_count“:1.”locations“:[{”is_oa“:false,”landing_page_url“:”https://doi.org/10.1007/978-3-031-24383-7_22“,”pdf_url“:null,”source“:null,”license“:null:”license_id“:null,”version“:number,”is_accepted“:false,”is_published“:false}],”best_oa_location“:nul,”sustainable_development_goals“:[],”grants“:[],”datasets“:],”versions“:[]:”,“referenced_works_count”:29,“referrenced_works”:[”https://openalex.org/W1491843047","https://openalex.org/W1948566616","https://openalex.org/W206217565","https://openalex.org/W2145339207","https://openalex.org/W2604382266","https://openalex.org/W2798494119","https://openalex.org/W2889186204","https://openalex.org/W2949476504","https://openalex.org/W2951805158","https://openalex.org/W2962682659","https://openalex.org/W2963043030","https://openalex.org/W2963433587","https://openalex.org/W2963857397","https://openalex.org/W2971159908","https://openalex.org/W2979372603","https://openalex.org/W3013860672","https://openalex.org/W3034330559","https://openalex.org/W3034782127","https://openalex.org/W3035597485","https://openalex.org/W3037879762","https://openalex.org/W3102854726","https://openalex.org/W3163045655","https://openalex.org/W3174076858","https://openalex.org/W317509535351","https://openalex.org/W3177075735","https://openalex.org/W3200895474","https://openalex.org/W3212099586","https://openalex.org/W3214586773","https://openalex.org/W4290742115“],”related_works“:[”https://openalex.org/W4375841483","https://openalex.org/W4288388931","https://openalex.org/W4206805925","https://openalex.org/W3112526189网址","https://openalex.org/W2892636954","https://openalex.org/W2572886659","https://openalex.org/W2364431604","https://openalex.org/W2022874741","https://openalex.org/W2018860124","https://openalex.org/W132856376“],”abstract_inverted_index“:{”Many“:[0],”methods“:[1,76],”have“:[2158],”been“:[3],”proposed“:[4],”to“:[5,9,38,56,69,89145148],”use“:[6,63],”reinforction“:[7],”learning“:[8],”train“:[10,70],”dialogue“:[11,15,67,72100113],”policy“:[12],”for“:[13154],”task-oriented“:[14],“系统”:[16],“in”:[17],“最近”:[18],“年份”:[19],“然而”:[20],““:[21,31,43,58,64,71,90104109112121130137151162174183],“高”:[22],“成本”:[23185],“的”:[24,3311164176],“交互”:[25],“与”:[26136],“用户”:[27],“有”:[28190],“严重”:[29],“受阻”:[30],“开发”:[32],“此”:[34,40],“字段。“:[35],”In“:[36],”order“:[37],”reduce“:[39],”interaction“:[41184],”cost“,:[42],”Deep“:44],”Dyna-Q“:[45],”(DDQ)“:[46],”algorithm“:47135172],”and“:[48,61140178189],”several“:[49],”variants“:[50],”input“:[51],”a“:[52142191],”所谓的“:[53],”world“:[54142191]105138152],“模型”:[55106139153],“模拟”:[57],“用户\u2019s”:[59],“响应“:[60],”然后“:[62],”生成“:[65102],”模拟“:[66],”数据“:[68101],”策略。“:[73114],”尽管如此,“:[74],”这些“:[75117],”遭受“:[77],”来自“:[78],”两个“:[79],”主要“:[80],”问题。“:[81],”“The”:[82,94],“first”:[83],“is”:[84,96],“limited”:[85],“training”:[86188],“efficiency”:[87],“due”:[88],“Deep-Q”:[91],“Network”:[92],“used”。“:[93],”second“:[95],”that“:[97170],”low quality“:[98],”simulation“:[99],”by“:[103],”may“:[107],”hurt“:[108],”performance“:[110],”To“:[115],”solve“:[116],”defectives“,”:[118],“we”:[119],“propose”:[120],”Dyna“:[122],”Proximal“:%123131],”Policy“:124132],”Optimization“:[125133],“(DPPO)”:[126],“算法。“:[127],”DPPO“:[128],”组合“:[129173],”(PPO)“:[134],”使用“:[141],”停用“:[143],”策略“:[144],”决定“:[146],”何时“:[147],”停止“:[149],”正在使用“:%150],”后续“:[155],”培训。“:[156],”我们“:[157],”进行“:[159],”实验“:[160],”关于“:[161],”任务“:[163193],”电影“:[165],”票“:[166],”预订。“:[167],“实验”:[168],“显示”:[169],“我们的”:[171],“优势”:[175],“DDQ”:[177],“PPO,”:[179],“其中”:[180],“显著”:[181],“减少”:[182],“必需”:[186],“期间”:[187],“较高”:[192],“成功”:[194],“速率”。“:[195]},”cited_by_api_url“:”https://api.openalex.org/works?filter=cites:W4317928369“,”counts_by_year“:[],”updated_date“:”2024-09-16T07:05:01.026441“,”created_date:“2023-01-25”}“