{"id":"https://openalex.org/W4391305545","doi":"https://doi.org/10.1109/taslp.2024.3357038","title":"Decomposed Deep Q-Network for Coherent Task-Oriented Dialogue Policy Learning","display_name":"Decomposed Deep Q-Network for Coherent Task-Oriented Dialogue Policy Learning","publication_year":2024,"publication_date":"2024-01-01","ids":{"openalex":"https://openalex.org/W4391305545","doi":"https://doi.org/10.1109/taslp.2024.3357038"},"language":"en","primary_location":{"id":"doi:10.1109/taslp.2024.3357038","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/taslp.2024.3357038","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://dspace.library.uu.nl/handle/1874/435964","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5103015387","display_name":"Yangyang Zhao","orcid":"https://orcid.org/0000-0003-1890-6508"},"institutions":[{"id":"https://openalex.org/I56934997","display_name":"Changsha University of Science and Technology","ror":"https://ror.org/03yph8055","country_code":"CN","type":"education","lineage":["https://openalex.org/I56934997"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yangyang Zhao","raw_affiliation_strings":["Department of Computer and Communication Engineering, Changsha University of Science and Technology, Changsha, China"],"affiliations":[{"raw_affiliation_string":"Department of Computer and Communication Engineering, Changsha University of Science and Technology, Changsha, China","institution_ids":["https://openalex.org/I56934997"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100625569","display_name":"Kai Yin","orcid":"https://orcid.org/0000-0002-7391-2815"},"institutions":[{"id":"https://openalex.org/I90610280","display_name":"South China University of Technology","ror":"https://ror.org/0530pts50","country_code":"CN","type":"education","lineage":["https://openalex.org/I90610280"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Kai Yin","raw_affiliation_strings":["Department of Software, South China University of Technology, Guangzhou, China"],"affiliations":[{"raw_affiliation_string":"Department of Software, South China University of Technology, Guangzhou, China","institution_ids":["https://openalex.org/I90610280"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100344975","display_name":"Zhenyu Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I90610280","display_name":"South China University of Technology","ror":"https://ror.org/0530pts50","country_code":"CN","type":"education","lineage":["https://openalex.org/I90610280"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhenyu Wang","raw_affiliation_strings":["Department of Software, South China University of Technology, Guangzhou, China"],"affiliations":[{"raw_affiliation_string":"Department of Software, South China University of Technology, Guangzhou, China","institution_ids":["https://openalex.org/I90610280"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5028760842","display_name":"Mehdi Dastani","orcid":"https://orcid.org/0000-0002-4641-4087"},"institutions":[{"id":"https://openalex.org/I193662353","display_name":"Utrecht University","ror":"https://ror.org/04pp8hn57","country_code":"NL","type":"education","lineage":["https://openalex.org/I193662353"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Mehdi Dastani","raw_affiliation_strings":["Department of Information and Computing Sciences, Utrecht University, Utrecht, The Netherlands"],"affiliations":[{"raw_affiliation_string":"Department of Information and Computing Sciences, Utrecht University, Utrecht, The Netherlands","institution_ids":["https://openalex.org/I193662353"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5107808323","display_name":"Shihan Wang","orcid":"https://orcid.org/0000-0001-5971-7522"},"institutions":[{"id":"https://openalex.org/I193662353","display_name":"Utrecht University","ror":"https://ror.org/04pp8hn57","country_code":"NL","type":"education","lineage":["https://openalex.org/I193662353"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Shihan Wang","raw_affiliation_strings":["Department of Information and Computing Sciences, Utrecht University, Utrecht, The Netherlands"],"affiliations":[{"raw_affiliation_string":"Department of Information and Computing Sciences, Utrecht University, Utrecht, The Netherlands","institution_ids":["https://openalex.org/I193662353"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5103015387"],"corresponding_institution_ids":["https://openalex.org/I56934997"],"apc_list":null,"apc_paid":null,"fwci":0.6879,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.72139645,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":96},"biblio":{"volume":"32","issue":null,"first_page":"1380","last_page":"1391"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9936000108718872,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7544829249382019},{"id":"https://openalex.org/keywords/generality","display_name":"Generality","score":0.7135873436927795},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7074159383773804},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6081737279891968},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.6033554673194885},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.5694993734359741},{"id":"https://openalex.org/keywords/value","display_name":"Value (mathematics)","score":0.4984908103942871},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.4798252582550049},{"id":"https://openalex.org/keywords/flexibility","display_name":"Flexibility (engineering)","score":0.4618028998374939},{"id":"https://openalex.org/keywords/space","display_name":"Space (punctuation)","score":0.4572571814060211},{"id":"https://openalex.org/keywords/value-network","display_name":"Value network","score":0.4480821490287781},{"id":"https://openalex.org/keywords/bellman-equation","display_name":"Bellman equation","score":0.4282628297805786},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3227168917655945},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.1483754813671112},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.1386561095714569}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7544829249382019},{"id":"https://openalex.org/C2780767217","wikidata":"https://www.wikidata.org/wiki/Q5532421","display_name":"Generality","level":2,"score":0.7135873436927795},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7074159383773804},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6081737279891968},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.6033554673194885},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.5694993734359741},{"id":"https://openalex.org/C2776291640","wikidata":"https://www.wikidata.org/wiki/Q2912517","display_name":"Value (mathematics)","level":2,"score":0.4984908103942871},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.4798252582550049},{"id":"https://openalex.org/C2780598303","wikidata":"https://www.wikidata.org/wiki/Q65921492","display_name":"Flexibility (engineering)","level":2,"score":0.4618028998374939},{"id":"https://openalex.org/C2778572836","wikidata":"https://www.wikidata.org/wiki/Q380933","display_name":"Space (punctuation)","level":2,"score":0.4572571814060211},{"id":"https://openalex.org/C89249532","wikidata":"https://www.wikidata.org/wiki/Q7912758","display_name":"Value network","level":3,"score":0.4480821490287781},{"id":"https://openalex.org/C14646407","wikidata":"https://www.wikidata.org/wiki/Q1430750","display_name":"Bellman equation","level":2,"score":0.4282628297805786},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3227168917655945},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.1483754813671112},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.1386561095714569},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C162853370","wikidata":"https://www.wikidata.org/wiki/Q39809","display_name":"Marketing","level":1,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C78458016","wikidata":"https://www.wikidata.org/wiki/Q840400","display_name":"Evolutionary biology","level":1,"score":0.0},{"id":"https://openalex.org/C4216890","wikidata":"https://www.wikidata.org/wiki/Q815823","display_name":"Business model","level":2,"score":0.0},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.0},{"id":"https://openalex.org/C144133560","wikidata":"https://www.wikidata.org/wiki/Q4830453","display_name":"Business","level":0,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C542102704","wikidata":"https://www.wikidata.org/wiki/Q183257","display_name":"Psychotherapist","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/taslp.2024.3357038","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/taslp.2024.3357038","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},{"id":"pmh:oai:dspace.library.uu.nl:1874/435964","is_oa":true,"landing_page_url":"https://dspace.library.uu.nl/handle/1874/435964","pdf_url":null,"source":{"id":"https://openalex.org/S4306401649","display_name":"Utrecht University Repository (Utrecht University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I193662353","host_organization_name":"Utrecht University","host_organization_lineage":["https://openalex.org/I193662353"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"}],"best_oa_location":{"id":"pmh:oai:dspace.library.uu.nl:1874/435964","is_oa":true,"landing_page_url":"https://dspace.library.uu.nl/handle/1874/435964","pdf_url":null,"source":{"id":"https://openalex.org/S4306401649","display_name":"Utrecht University Repository (Utrecht University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I193662353","host_organization_name":"Utrecht University","host_organization_lineage":["https://openalex.org/I193662353"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":50,"referenced_works":["https://openalex.org/W1585861384","https://openalex.org/W1592847719","https://openalex.org/W1975244201","https://openalex.org/W2071302132","https://openalex.org/W2107726111","https://openalex.org/W2109910161","https://openalex.org/W2121517924","https://openalex.org/W2153385324","https://openalex.org/W2532116096","https://openalex.org/W2571927164","https://openalex.org/W2740191615","https://openalex.org/W2884814595","https://openalex.org/W2889186204","https://openalex.org/W2946824041","https://openalex.org/W2949476504","https://openalex.org/W2951805158","https://openalex.org/W2956161617","https://openalex.org/W2962686687","https://openalex.org/W2963140401","https://openalex.org/W2963306198","https://openalex.org/W2963433587","https://openalex.org/W2964080167","https://openalex.org/W2964180249","https://openalex.org/W2965908679","https://openalex.org/W2971159908","https://openalex.org/W2979372603","https://openalex.org/W2981793129","https://openalex.org/W2998458199","https://openalex.org/W3034782127","https://openalex.org/W3046799624","https://openalex.org/W3099140719","https://openalex.org/W3104546989","https://openalex.org/W3184662702","https://openalex.org/W3190743547","https://openalex.org/W3214586773","https://openalex.org/W4206917480","https://openalex.org/W6634579893","https://openalex.org/W6634945376","https://openalex.org/W6636252571","https://openalex.org/W6677116005","https://openalex.org/W6683821272","https://openalex.org/W6685444567","https://openalex.org/W6703271639","https://openalex.org/W6731619401","https://openalex.org/W6734215269","https://openalex.org/W6734696739","https://openalex.org/W6750702700","https://openalex.org/W6753556266","https://openalex.org/W6758714175","https://openalex.org/W6776546938"],"related_works":["https://openalex.org/W2045049461","https://openalex.org/W1978893398","https://openalex.org/W2201908702","https://openalex.org/W4381094582","https://openalex.org/W2369625323","https://openalex.org/W2386410636","https://openalex.org/W3038962357","https://openalex.org/W2025663273","https://openalex.org/W3099153698","https://openalex.org/W3201942433"],"abstract_inverted_index":{"Reinforcement":[0],"learning":[1],"(RL)":[2],"has":[3,20],"emerged":[4],"as":[5],"a":[6,23,38,92,121],"key":[7],"technique":[8],"for":[9,30,77,85],"designing":[10],"dialogue":[11,18,31,50,61,177,215],"policies.":[12,32],"However,":[13],"action":[14,150],"space":[15],"inflation":[16],"in":[17,147,166,193],"tasks":[19],"led":[21],"to":[22,52,212],"heavy":[24],"decision":[25,154],"burden":[26],"and":[27,59,82,127,174,186,196],"incoherence":[28,163],"problems":[29],"In":[33],"this":[34,184],"paper,":[35],"we":[36],"propose":[37],"novel":[39],"decomposed":[40],"deep":[41],"Q-network":[42],"(D2Q)":[43],"that":[44,159],"exploits":[45],"the":[46,68,78,83,86,101,105,109,115,125,128,134,139,148,153,160,172,202],"natural":[47],"structure":[48,119],"of":[49,65,72,104,136,162,176,204],"actions":[51,146],"perform":[53],"decomposition":[54],"on":[55,114,130],"Q-function,":[56,69],"realizing":[57],"efficient":[58],"coherent":[60],"policy":[62,178,216],"learning.":[63,179],"Instead":[64],"directly":[66],"evaluating":[67],"it":[70],"consists":[71],"two":[73],"separate":[74],"estimators,":[75],"one":[76],"abstract":[79,97,140],"action-value":[80,88,98,111,141],"functions":[81],"other":[84,213],"specific":[87,110,145],"functions,":[89],"both":[90,194],"sharing":[91],"common":[93],"feature":[94],"layer.":[95],"The":[96],"function":[99,112,142],"determines":[100],"speech":[102,131],"act":[103],"system":[106,129],"action,":[107],"while":[108],"focuses":[113],"concrete":[116],"action.":[117],"This":[118],"establishes":[120],"logical":[122],"relationship":[123],"between":[124],"user":[126],"actions,":[132],"avoiding":[133],"problem":[135,161,185],"incoherence.":[137],"Moreover,":[138],"shields":[143],"unreasonable":[144],"inflated":[149],"space,":[151],"reducing":[152],"complexity.":[155],"Our":[156,180],"results":[157],"show":[158],"is":[164],"prevalent":[165],"existing":[167],"approaches,":[168],"which":[169],"significantly":[170,188],"impacts":[171],"efficiency":[173],"quality":[175],"D2Q":[181],"architecture":[182],"alleviates":[183],"performs":[187],"better":[189],"than":[190],"competitive":[191],"baselines":[192],"evaluated":[195],"human":[197],"experiments.":[198],"Further":[199],"experiments":[200],"validate":[201],"generality":[203],"our":[205],"method.":[206],"It":[207],"can":[208],"be":[209],"easily":[210],"extended":[211],"RL-based":[214],"approaches.":[217]},"counts_by_year":[{"year":2025,"cited_by_count":2}],"updated_date":"2026-04-05T17:49:38.594831","created_date":"2025-10-10T00:00:00"}
