{“id”:“https://openalex.org/W1983264093“,”doi“:”https://doi.org/10.109/devlrn.2013.6652533“,”title“:”依赖于州的折扣因子的强化学习“,”display_name“:”基于州的折扣因数的强化学习”,“publication_year”:2013,“publiation_date”:“2013-08-01”,“ids”:{“openalex”:“https://openalex.org/W1983264093网址“,”doi“:”https://doi.org/10.109/devlrn.2013.6652533“,”mag“:”1983264093“},”language“:”en“,”primary_location“:{”is_oa“:false,”landing_page_url“:”https://doi.org/10.109/devlrn.2013.6652533“,”pdf_url“:null,”source“:null,”license“:null',”licence_id“:null,”version“:nuller,”is_accepted“:false,”is_published“:false},”type“:”article“,”type_crossref“:“procesdings-article”,”indexed_in“:[”crossref“],”open_access“:{”is_oa“:false,”oa_status“:”closed“,”oa_url”:null“,”any_repository_has_fulltext“:false}”,”authorships“:[{”author_position“:”first“,”作者“:{”id“:”https://openalex.org/A5024114769“,”display_name“:”Naoto Yoshida“,”orcid“:”https://orcid.org/0000-0003-0394-2450},“机构”:[{“id”:https://openalex.org/I75917431“,”display_name“:”奈良科技学院“,”ror“:”网址:https://ror.org/05bhada84“,”country_code“:”JP“,”type“:“教育”,”世系“:[”https://openalex.org/I75917431“]}],”国家“:[”JP“],”is_corresponding“:false,”raw_author_name“:”吉田直人“,”raw _ afiliation_strings“:[“日本奈良奈良科学技术研究院(NAIST)”],”affiliations“:[{”raw _affiliation_strong“:”日本奈良科技研究院(NA IST)“,”institution_ids“:]”https://openalex.org/I75917431“]}]},{”author_position“:”middle“,”author“:{”id“:”https://openalex.org/A5031054137“,”display_name“:”Eiji Uchibe“,”orcid“:”https://orcid.org/0000-0001-7908-0258},“机构”:[{“id”:https://openalex.org/I142637625“,”display_name“:”冲绳理工大学研究生院“,”ror“:”https://ror.org/02qg15b79“,”country_code“:”JP“,”type“:“教育”,”世系“:[”https://openalex.org/I142637625“]}],”国家“:[”JP“],”is_corresponding“:false,”raw_author_name“:”Eiji Uchibe“,”raw _ afiliation_strings“:[“冲绳科学技术研究所(OIST),冲绳,日本”],”affiliations“:[{”raw _affiliation_strong“:”冲绳科学与技术研究所,日本”,“institution_ids”:[“https://openalex.org/I142637625“]}]},{”author_position“:”last“,”author“:{”id“:”https://openalex.org/A5004840638“,”display_name“:”Kenji Doya“,”orcid“:”https://orcid.org/0000-0002-2446-6820},“机构”:[{“id”:https://openalex.org/I75917431“,”display_name“:”奈良科技学院“,”ror“:”https://ror.org/05bhada84“,”country_code“:”JP“,”type“:“教育”,”世系“:[”https://openalex.org/I75917431“]}],”国家“:[”JP“],”is_corresponding“:false,”raw_author_name“:”Kenji Doya“,”raw _ afiliation_strings“:[“日本奈良市奈良科学技术研究院(NAIST)”],”affiliations“:[{”raw _affiliation_strong“:”奈良市科学技术研究所(NAISD),日本奈良“,”institution_ids“:[https://openalex.org/I75917431“]}]}],”countries_destict_count“:1,”institutions_disict_count“:2,”corresponding_author_ids“:[],”corresponding_institution_ids“:[],”apc_list“:null,”apc_payed“:null,”has_fulltext“:true,”fulltext_origin“:”ngrams“,”cited_by_count“:13,”cited_by_percentle_year“:{”min“:88,”max“:89},”biblio“:{”volume“:null,”issue“:null,”first_page““:null,”last_page“:null},”is_retracted“:false,”is_paratext“:fase,”primary_topic“:{”id“:”https://openalex.org/T10462“,”display_name“:”强化学习算法“,”score“:0.9844,”subfield“:{”id“:”https://openalex.org/subfields/1702“,”display_name“:”人工智能“},”字段“:{”id“:”https://openalex.org/fields/17“,”display_name“:”Computer Science“},”domain“:{”id“:”https://openalex.org/domains/3“,”display_name“:”Physical Sciences“}},”topics“:[{”id“:”https://openalex.org/T10462“,”display_name“:”强化学习算法“,”score“:0.9844,”subfield“:{”id“:”https://openalex.org/subfields/1702“,”display_name“:”人工智能“},”字段“:{”id“:”https://openalex.org/fields/17“,”display_name“:”Computer Science“},”domain“:{”id“:”https://openalex.org/domains/3“,”display_name“:”物理科学“}},{”id“:”https://openalex.org/T10328“,”display_name“:”供应链中的协调与信息共享“,”score“:0.981,”subfield“:{”id“:”https://openalex.org/subfields/s404“,”display_name“:”管理信息系统“},”字段“:{”id“:”https://openalex.org/fields/14“,”display_name“:”商业、管理和会计“},”域“:{”id“:”https://openalex.org/domains/2“,”display_name“:”社会科学“}}],”关键词“:[{”id“:”https://openalex.org/keywords/reinforcement-learning网站“,”display_name“:”强化学习“,”score“:0.613436},{”id“:”https://openalex.org/keywords/dynamic-pricing“,”display_name“:”动态定价“,”score“:0.50908}],”concepts“:[{”id“:”https://openalex.org/C97541855,“wikidata”:https://www.wikidata.org/wiki/Q830687“,”display_name“:”强化学习“,”level“:2,”score“:0.8881383},{”id“:”https://openalex.org/C41008148,“wikidata”:https://www.wikidata.org/wiki/Q21198“,”display_name“:”计算机科学“,”level“:0,”score“:0.6433529},{”id“:”https://openalex.org/C14036430网址,“wikidata”:https://www.wikidata.org/wiki/Q3736076“,”display_name“:”功能(生物学)“,”级别“:2,”分数“:0.5458085},{”id“:”https://openalex.org/C14646407,“wikidata”:https://www.wikidata.org/wiki/Q1430750“,”display_name“:”Bellman equation“,”level“:2,”score“:0.5253222},{”id“:”https://openalex.org/C6177178,“wikidata”:https://www.wikidata.org/wiki/Q10998070“,”display_name“:”折扣“,”级别“:2,”分数“:0.52450097},{”id“:”https://openalex.org/C2781039887,“wikidata”:https://www.wikidata.org/wiki/Q1391724“,”display_name“:”Factor(programming language)“,”level“:2,”score“:0.5172487},{”id“:”https://openalex.org/C2777027219,“wikidata”:https://www.wikidata.org/wiki/Q1284190“,”display_name“:”Constant(计算机编程)“,”level“:2,”score“:0.49900484},{”id“:”https://openalex.org/C154945302,“wikidata”:https://www.wikidata.org/wiki/Q11660“,”display_name“:”人工智能“,”level“:1,”score“:0.48523068},{”id“:”https://openalex.org/C2776401178,“wikidata”:https://www.wikidata.org/wiki/Q12050496“,”display_name“:”功能(语言学)“,”级别“:2,”分数“:0.46893266},{”id“:”https://openalex.org/C126255220,“wikidata”:https://www.wikidata.org/wiki/Q141495“,”display_name“:”数学优化“,”level“:1,”score“:0.4640061},{”id“:”https://openalex.org/C2776291640,“wikidata”:https://www.wikidata.org/wiki/Q2912517“,”display_name“:”Value(mathematics)“,”level“:2,”score“:0.43115658},{”id“:”https://openalex.org/C48103436,“wikidata”:https://www.wikidata.org/wiki/Q599031“,”display_name“:”State(computer science)“,”level“:2,”score“:0.41487175},{”id“:”https://openalex.org/C119857082,“wikidata”:https://www.wikidata.org/wiki/Q2539“,”display_name“:”机器学习“,”level“:1,”score“:0.35321903},{”id“:”https://openalex.org/C11413529,“wikidata”:https://www.wikidata.org/wiki/Q8366“,”display_name“:”Algorithm“,”level“:1,”score“:0.3136969},{”id“:”https://openalex.org/C33923547,“wikidata”:https://www.wikidata.org/wiki/Q395“,”display_name“:”数学“,”等级“:0,”分数“:0.2725293},{”id“:”https://openalex.org/C10138342,“wikidata”:https://www.wikidata.org/wiki/Q43015“,”display_name“:”Finance“,”level“:1,”score“:0.0665915},{”id“:”https://openalex.org/C41895202,“wikidata”:https://www.wikidata.org/wiki/Q8162“,”display_name“:”语言学“,”level“:1,”score“:0.0},{”id“:”https://openalex.org/C138885662,“wikidata”:https://www.wikidata.org/wiki/Q5891“,”display_name“:”哲学“,”等级“:0,”分数“:0.0},{”id“:”https://openalex.org/C78458016,“wikidata”:https://www.wikidata.org/wiki/Q840400“,”display_name“:”进化生物学“,”level“:1,”score“:0.0},{”id“:”https://openalex.org/C86803240,“wikidata”:https://www.wikidata.org/wiki/Q420“,”display_name“:”生物学“,”等级“:0,”分数“:0.0},{”id“:”https://openalex.org/C162324750,“wikidata”:https://www.wikidata.org/wiki/Q8134“,”display_name“:”经济学“,”level“:0,”score“:0.0},{”id“:”https://openalex.org/C199360897,“wikidata”:https://www.wikidata.org/wiki/Q9143“,”display_name“:”编程语言“,”level“:1,”score“:0.0}],”mesh“:[],”locations_count“:1.”locations“:[{”is_oa“:false,”landing_page_url“:”https://doi.org/10.109/devlrn.2013.6652533“,”pdf_url“:null,”source“:null,”license“:null:”license_id“:null,”version“:nuller,”is_accepted“:false,”is_published“:false}],”best_oa_location“:nul,”sustainable_development_goals“:[],”grants“:[],”datasets“:],”versions“:[]:”,“referenced_works_count”:19,“referrenced_works”:[”https://openalex.org/W1500331901","https://openalex.org/W1777239053","https://openalex.org/W1978481500","https://openalex.org/W1996579288","https://openalex.org/W2055647224","https://openalex.org/W2101915445","https://openalex.org/W2117726420","https://openalex.org/W2119333483","https://openalex.org/W2158596240","https://openalex.org/W2161563886","https://openalex.org/W2164424353","https://openalex.org/W2165131254","https://openalex.org/W2168024904","https://openalex.org/W2328851770","https://openalex.org/W2336687883","https://openalex.org/W2951516712","https://openalex.org/W3011120880","https://openalex.org/W32403112","https://openalex.org/W4214717370“],”related_works“:[”https://openalex.org/W4319083788","https://openalex.org/W4241291308","https://openalex.org/W3188220908","https://openalex.org/W3174239553","https://openalex.org/W3103643887","https://openalex.org/W2978320887","https://openalex.org/W2950892788","https://openalex.org/W2194966727","https://openalex.org/W1588825565","https://openalex.org/W1567215325“],”ngrams_url“:”https://api.openalex.org/works/W1983264093/ngrams“,”“abstract_inverted_index”:{“常规”:[0],“强化”:[1,62],“学习”:[2,12,63],“算法”:[3],“有”:[4],“多个”:[5],“参数”:[6],“其中”:[7141],“确定”:[8],“特征”:[9,22,27,31,51,61,72,80,84,89102106118130],“的”:[11,30,91],“过程”:[13],“调用”:[14],“元参数。”:[15],“In”:[16114],“this”:[17],“研究,“:[18],”我们“:[19,49,77121],”关注“:[20],”关于“:[21],”折扣“:[23,39,53103107138149],”因素“:[24,40104],”那“:[25,79129145],”影响“:[26],”时间“:[28],“规模”:[29],”权衡“:[32],”介于“:[33],”立即“:[34],”和“:[35,76105127],”延迟“:[36],”奖励。“:[37],”The“:[38],”is“:[41],”通常“:[42],”consided“:[43],”as“:[44],”a“:[45,56,68,98123147],”常数“:[46148],”value“,”:[47],”but“:[48],”Introduct“:[50],”state dependent“:[52137],”function“:[54,871081399],”new“:[57,69,92],”optimization“:[58],”criteria“:[59,93],”for“:[60],“算法。“:[64113],“We”:[65,95],“first”:[66],“derive”:/67],“algorithm”:%70,81132],“under”:[71],“criteria”,“:[73],“named”:[74],“ExQ-learning”:[75],“prove”:[78],“converges”:[82],“to”:[83100116],“optimal”:+85],“action-value”:[06],“in”:[88],“意为”:[90],“w.p.1”:[94],“then”:[96],“present”:[97],“framework”:[99],“optimize”:[101],“by”:[109],“using”:[110]an“:[111135],”进化“:[112],”顺序“:[115],”验证“:[117],”建议“:[119131],”方法“:[120],”行为“:[122],”简单“:[124],”计算机“:[125],”模拟“:[126],”显示“:[128],”可以“:[133],”查找“:[134],”适当“:[136],”with“:[140146],”执行“:[142],”更好“:[143],”比“:[144],”因子●●●●。“:[150]},”cited_by_api_url“:”https://api.openalex.org/works?filter=cites:W1983264093“,”“counts_by_year”:[{“年份”:2023,”“cited_by_count”:5},{“年度”:2022,”“cited_by_cunt”:2},}“年份“:2021,”updated_date“:”2024-06-18T09:53:29.278251“,”创建日期“:”2016-06-24“}