{"id":"https://openalex.org/W2963433587","doi":"https://doi.org/10.1609/aaai.v33i01.33017289","title":"Switch-Based Active Deep Dyna-Q: Efficient Adaptive Planning for Task-Completion Dialogue Policy Learning","display_name":"Switch-Based Active Deep Dyna-Q: Efficient Adaptive Planning for Task-Completion Dialogue Policy Learning","publication_year":2019,"publication_date":"2019-07-17","ids":{"openalex":"https://openalex.org/W2963433587","doi":"https://doi.org/10.1609/aaai.v33i01.33017289","mag":"2963433587"},"language":"en","primary_location":{"id":"doi:10.1609/aaai.v33i01.33017289","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v33i01.33017289","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/4715/4593","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://ojs.aaai.org/index.php/AAAI/article/download/4715/4593","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5082763858","display_name":"Yuexin Wu","orcid":"https://orcid.org/0000-0001-9005-5678"},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Yuexin Wu","raw_affiliation_strings":["Carnegie Mellon University"],"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5021140826","display_name":"Xiujun Li","orcid":"https://orcid.org/0000-0001-7771-2725"},"institutions":[{"id":"https://openalex.org/I4210164937","display_name":"Microsoft Research (United Kingdom)","ror":"https://ror.org/05k87vq12","country_code":"GB","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210164937"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Xiujun Li","raw_affiliation_strings":["Microsoft Research"],"affiliations":[{"raw_affiliation_string":"Microsoft Research","institution_ids":["https://openalex.org/I4210164937"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100442599","display_name":"Jingjing Liu","orcid":"https://orcid.org/0009-0006-8629-4151"},"institutions":[{"id":"https://openalex.org/I4210164937","display_name":"Microsoft Research (United Kingdom)","ror":"https://ror.org/05k87vq12","country_code":"GB","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210164937"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Jingjing Liu","raw_affiliation_strings":["Microsoft"],"affiliations":[{"raw_affiliation_string":"Microsoft","institution_ids":["https://openalex.org/I4210164937"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114910293","display_name":"Jianfeng Gao","orcid":"https://orcid.org/0000-0002-5702-6143"},"institutions":[{"id":"https://openalex.org/I4210164937","display_name":"Microsoft Research (United Kingdom)","ror":"https://ror.org/05k87vq12","country_code":"GB","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210164937"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Jianfeng Gao","raw_affiliation_strings":["Microsoft Research"],"affiliations":[{"raw_affiliation_string":"Microsoft Research","institution_ids":["https://openalex.org/I4210164937"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5106542734","display_name":"Yiming Yang","orcid":"https://orcid.org/0009-0006-3569-0023"},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yiming Yang","raw_affiliation_strings":["Carnegie Mellon University"],"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University","institution_ids":["https://openalex.org/I74973139"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5082763858"],"corresponding_institution_ids":["https://openalex.org/I74973139"],"apc_list":null,"apc_paid":null,"fwci":4.3892,"has_fulltext":true,"cited_by_count":45,"citation_normalized_percentile":{"value":0.94749282,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":99},"biblio":{"volume":"33","issue":"01","first_page":"7289","last_page":"7296"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9958000183105469,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8300271034240723},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7372775077819824},{"id":"https://openalex.org/keywords/q-learning","display_name":"Q-learning","score":0.7231999039649963},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.7191301584243774},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5069699287414551},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.4797845482826233},{"id":"https://openalex.org/keywords/active-learning","display_name":"Active learning (machine learning)","score":0.46488142013549805},{"id":"https://openalex.org/keywords/space","display_name":"Space (punctuation)","score":0.4473339319229126},{"id":"https://openalex.org/keywords/sample","display_name":"Sample (material)","score":0.4304999113082886},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.35710322856903076},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.10688179731369019}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8300271034240723},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7372775077819824},{"id":"https://openalex.org/C188116033","wikidata":"https://www.wikidata.org/wiki/Q2664563","display_name":"Q-learning","level":3,"score":0.7231999039649963},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.7191301584243774},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5069699287414551},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.4797845482826233},{"id":"https://openalex.org/C77967617","wikidata":"https://www.wikidata.org/wiki/Q4677561","display_name":"Active learning (machine learning)","level":2,"score":0.46488142013549805},{"id":"https://openalex.org/C2778572836","wikidata":"https://www.wikidata.org/wiki/Q380933","display_name":"Space (punctuation)","level":2,"score":0.4473339319229126},{"id":"https://openalex.org/C198531522","wikidata":"https://www.wikidata.org/wiki/Q485146","display_name":"Sample (material)","level":2,"score":0.4304999113082886},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.35710322856903076},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.10688179731369019},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C43617362","wikidata":"https://www.wikidata.org/wiki/Q170050","display_name":"Chromatography","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1609/aaai.v33i01.33017289","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v33i01.33017289","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/4715/4593","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1609/aaai.v33i01.33017289","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v33i01.33017289","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/4715/4593","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G2581707695","display_name":"BIGDATA: F: Large-Scale Transductive Learning from Heterogeneous Data Sources","funder_award_id":"1546329","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G5420615920","display_name":null,"funder_award_id":"IIS-1546329","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G848032724","display_name":null,"funder_award_id":"Science","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320308943","display_name":"Microsoft Research","ror":"https://ror.org/00d0nc645"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2963433587.pdf","grobid_xml":"https://content.openalex.org/works/W2963433587.grobid-xml"},"referenced_works_count":27,"referenced_works":["https://openalex.org/W1491843047","https://openalex.org/W1665214252","https://openalex.org/W1731081199","https://openalex.org/W1948566616","https://openalex.org/W1975244201","https://openalex.org/W2062175565","https://openalex.org/W2109038907","https://openalex.org/W2117989772","https://openalex.org/W2145339207","https://openalex.org/W2257979135","https://openalex.org/W2295072214","https://openalex.org/W2417401578","https://openalex.org/W2473329891","https://openalex.org/W2571927164","https://openalex.org/W2798494119","https://openalex.org/W2889186204","https://openalex.org/W2949252816","https://openalex.org/W2952013107","https://openalex.org/W2962996309","https://openalex.org/W2963007936","https://openalex.org/W2963068985","https://openalex.org/W2964101860","https://openalex.org/W4306716473","https://openalex.org/W6637618735","https://openalex.org/W6666761814","https://openalex.org/W6676205556","https://openalex.org/W7075680496"],"related_works":["https://openalex.org/W3096874164","https://openalex.org/W2166117066","https://openalex.org/W4376605461","https://openalex.org/W2357975469","https://openalex.org/W2136202932","https://openalex.org/W3087814763","https://openalex.org/W2892507673","https://openalex.org/W2361647908","https://openalex.org/W2937181779","https://openalex.org/W2537866915"],"abstract_inverted_index":{"Training":[0],"task-completion":[1],"dialogue":[2],"agents":[3],"with":[4],"reinforcement":[5],"learning":[6,104],"usually":[7],"requires":[8],"a":[9,23,82,90],"large":[10],"number":[11],"of":[12,43,50,60,102],"real":[13,61,91],"user":[14],"experiences.":[15],"The":[16,41],"Dyna-Q":[17,77,147],"algorithm":[18],"extends":[19],"Q-learning":[20,156],"by":[21,37,80,109,133],"integrating":[22,81],"world":[24,39,52,112],"model,":[25],"and":[26,136,155,161],"thus":[27],"can":[28],"effectively":[29],"boost":[30],"training":[31],"efficiency":[32],"using":[33],"simulated":[34,63,93,116],"experiences":[35,64,117],"generated":[36],"the":[38,48,51,57,73,100,111,119,123,139],"model.":[40],"effectiveness":[42],"Dyna-Q,":[44],"however,":[45],"depends":[46],"on":[47],"quality":[49],"model":[53,113],"-":[54],"or":[55,92],"implicitly,":[56],"pre-specified":[58],"ratio":[59],"vs.":[62],"used":[65],"for":[66,95,105],"Q-learning.":[67,96],"To":[68],"this":[69],"end,":[70],"we":[71,98],"extend":[72],"recently":[74],"proposed":[75],"Deep":[76,146],"(DDQ)":[78],"framework":[79,141],"switcher":[83,135],"that":[84,132],"automatically":[85],"determines":[86],"whether":[87],"to":[88,114,150],"use":[89,101],"experience":[94],"Furthermore,":[97],"explore":[99],"active":[103,137],"improving":[106],"sample":[107],"efficiency,":[108],"encouraging":[110],"generate":[115],"in":[118,158],"stateaction":[120],"space":[121],"where":[122],"agent":[124],"has":[125],"not":[126],"(fully)":[127],"explored.":[128],"Our":[129],"results":[130],"show":[131],"combining":[134],"learning,":[138],"new":[140],"named":[142],"as":[143],"Switch-based":[144],"Active":[145],"(Switch-DDQ),":[148],"leads":[149],"significant":[151],"improvement":[152],"over":[153],"DDQ":[154],"baselines":[157],"both":[159],"simulation":[160],"human":[162],"evaluations.1":[163]},"counts_by_year":[{"year":2025,"cited_by_count":4},{"year":2024,"cited_by_count":5},{"year":2023,"cited_by_count":3},{"year":2022,"cited_by_count":7},{"year":2021,"cited_by_count":9},{"year":2020,"cited_by_count":13},{"year":2019,"cited_by_count":4}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
