{"id":"https://openalex.org/W2410985346","doi":"https://doi.org/10.18653/v1/w16-3613","title":"Policy Networks with Two-Stage Training for Dialogue Systems","display_name":"Policy Networks with Two-Stage Training for Dialogue Systems","publication_year":2016,"publication_date":"2016-01-01","ids":{"openalex":"https://openalex.org/W2410985346","doi":"https://doi.org/10.18653/v1/w16-3613","mag":"2410985346"},"language":"en","primary_location":{"id":"doi:10.18653/v1/w16-3613","is_oa":false,"landing_page_url":"https://doi.org/10.18653/v1/w16-3613","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 17th Annual Meeting of the Special Interest Group\n          on Discourse and Dialogue","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/1606.03152","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5070690406","display_name":"Mehdi Fatemi","orcid":null},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Mehdi Fatemi","raw_affiliation_strings":["Microsoft (United States), Redmond, United States"],"affiliations":[{"raw_affiliation_string":"Microsoft (United States), Redmond, United States","institution_ids":["https://openalex.org/I1290206253"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049090525","display_name":"Layla El Asri","orcid":null},"institutions":[{"id":"https://openalex.org/I130701444","display_name":"Georgia Institute of Technology","ror":"https://ror.org/01zkghx44","country_code":"US","type":"education","lineage":["https://openalex.org/I130701444"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Layla El Asri","raw_affiliation_strings":["Georgia Institute of Technology, Atlanta, United States"],"affiliations":[{"raw_affiliation_string":"Georgia Institute of Technology, Atlanta, United States","institution_ids":["https://openalex.org/I130701444"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5088078468","display_name":"Hannes Schulz","orcid":"https://orcid.org/0000-0001-6408-9794"},"institutions":[{"id":"https://openalex.org/I135140700","display_name":"University of Bonn","ror":"https://ror.org/041nas322","country_code":"DE","type":"education","lineage":["https://openalex.org/I135140700"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Hannes Schulz","raw_affiliation_strings":["University of Bonn, Bonn, Germany"],"affiliations":[{"raw_affiliation_string":"University of Bonn, Bonn, Germany","institution_ids":["https://openalex.org/I135140700"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5081879366","display_name":"Jing He","orcid":"https://orcid.org/0000-0001-6488-1052"},"institutions":[{"id":"https://openalex.org/I154099455","display_name":"Shandong University","ror":"https://ror.org/0207yh398","country_code":"CN","type":"education","lineage":["https://openalex.org/I154099455"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jing He","raw_affiliation_strings":["Shandong University, Jinan, China"],"affiliations":[{"raw_affiliation_string":"Shandong University, Jinan, China","institution_ids":["https://openalex.org/I154099455"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5103376786","display_name":"Kaheer Suleman","orcid":null},"institutions":[{"id":"https://openalex.org/I151746483","display_name":"University of Waterloo","ror":"https://ror.org/01aff2v68","country_code":"CA","type":"education","lineage":["https://openalex.org/I151746483"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Kaheer Suleman","raw_affiliation_strings":["University of Waterloo, Waterloo, Canada"],"affiliations":[{"raw_affiliation_string":"University of Waterloo, Waterloo, Canada","institution_ids":["https://openalex.org/I151746483"]}]}],"institutions":[],"countries_distinct_count":4,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5070690406"],"corresponding_institution_ids":["https://openalex.org/I1290206253"],"apc_list":null,"apc_paid":null,"fwci":6.6511,"has_fulltext":false,"cited_by_count":26,"citation_normalized_percentile":{"value":0.96767936,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"101","last_page":"110"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10444","display_name":"Context-Aware Activity Recognition Systems","score":0.9904999732971191,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9854999780654907,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8120343685150146},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8071798086166382},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6850102543830872},{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.6544995307922363},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.5794534087181091},{"id":"https://openalex.org/keywords/convergence","display_name":"Convergence (economics)","score":0.5639310479164124},{"id":"https://openalex.org/keywords/markov-decision-process","display_name":"Markov decision process","score":0.5182549953460693},{"id":"https://openalex.org/keywords/state","display_name":"State (computer science)","score":0.5162582397460938},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.4963691830635071},{"id":"https://openalex.org/keywords/software-deployment","display_name":"Software deployment","score":0.46914997696876526},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.46885713934898376},{"id":"https://openalex.org/keywords/deep-neural-networks","display_name":"Deep neural networks","score":0.4480564296245575},{"id":"https://openalex.org/keywords/partially-observable-markov-decision-process","display_name":"Partially observable Markov decision process","score":0.43299925327301025},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4033547639846802},{"id":"https://openalex.org/keywords/markov-chain","display_name":"Markov chain","score":0.30316439270973206},{"id":"https://openalex.org/keywords/markov-process","display_name":"Markov process","score":0.24920272827148438},{"id":"https://openalex.org/keywords/markov-model","display_name":"Markov model","score":0.20426130294799805},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.1404285430908203},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.09323570132255554},{"id":"https://openalex.org/keywords/computer-security","display_name":"Computer security","score":0.0720856785774231},{"id":"https://openalex.org/keywords/software-engineering","display_name":"Software engineering","score":0.07113710045814514}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8120343685150146},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8071798086166382},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6850102543830872},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.6544995307922363},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.5794534087181091},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.5639310479164124},{"id":"https://openalex.org/C106189395","wikidata":"https://www.wikidata.org/wiki/Q176789","display_name":"Markov decision process","level":3,"score":0.5182549953460693},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.5162582397460938},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.4963691830635071},{"id":"https://openalex.org/C105339364","wikidata":"https://www.wikidata.org/wiki/Q2297740","display_name":"Software deployment","level":2,"score":0.46914997696876526},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.46885713934898376},{"id":"https://openalex.org/C2984842247","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep neural networks","level":3,"score":0.4480564296245575},{"id":"https://openalex.org/C17098449","wikidata":"https://www.wikidata.org/wiki/Q176814","display_name":"Partially observable Markov decision process","level":4,"score":0.43299925327301025},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4033547639846802},{"id":"https://openalex.org/C98763669","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov chain","level":2,"score":0.30316439270973206},{"id":"https://openalex.org/C159886148","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov process","level":2,"score":0.24920272827148438},{"id":"https://openalex.org/C163836022","wikidata":"https://www.wikidata.org/wiki/Q6771326","display_name":"Markov model","level":3,"score":0.20426130294799805},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.1404285430908203},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.09323570132255554},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0720856785774231},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.07113710045814514},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C50522688","wikidata":"https://www.wikidata.org/wiki/Q189833","display_name":"Economic growth","level":1,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.18653/v1/w16-3613","is_oa":false,"landing_page_url":"https://doi.org/10.18653/v1/w16-3613","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 17th Annual Meeting of the Special Interest Group\n          on Discourse and Dialogue","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:1606.03152","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1606.03152","pdf_url":"https://arxiv.org/pdf/1606.03152","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"mag:2410985346","is_oa":true,"landing_page_url":"https://arxiv.org/pdf/1606.03152.pdf","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"doi:10.48550/arxiv.1606.03152","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.1606.03152","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article-journal"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:1606.03152","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1606.03152","pdf_url":"https://arxiv.org/pdf/1606.03152","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions","score":0.7799999713897705}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":32,"referenced_works":["https://openalex.org/W37080510","https://openalex.org/W903399924","https://openalex.org/W1569296262","https://openalex.org/W1595483645","https://openalex.org/W1757796397","https://openalex.org/W1757949796","https://openalex.org/W1778387566","https://openalex.org/W1975244201","https://openalex.org/W1987326241","https://openalex.org/W1994923984","https://openalex.org/W2054716580","https://openalex.org/W2062175565","https://openalex.org/W2084799336","https://openalex.org/W2094387729","https://openalex.org/W2102222436","https://openalex.org/W2117989772","https://openalex.org/W2120045257","https://openalex.org/W2145339207","https://openalex.org/W2155027007","https://openalex.org/W2156974606","https://openalex.org/W2167224731","https://openalex.org/W2174196774","https://openalex.org/W2175723363","https://openalex.org/W2251058040","https://openalex.org/W2257979135","https://openalex.org/W2260756217","https://openalex.org/W2438667436","https://openalex.org/W2914656440","https://openalex.org/W2949430050","https://openalex.org/W2952523895","https://openalex.org/W2964043796","https://openalex.org/W3089091950"],"related_works":["https://openalex.org/W2963993502","https://openalex.org/W2145339207","https://openalex.org/W2438667436","https://openalex.org/W2257979135","https://openalex.org/W1975244201","https://openalex.org/W2412899141","https://openalex.org/W2410983263","https://openalex.org/W1757796397","https://openalex.org/W2571927164","https://openalex.org/W2417401578","https://openalex.org/W2168490009","https://openalex.org/W2163068732","https://openalex.org/W2781726626","https://openalex.org/W2936107880","https://openalex.org/W2126152565","https://openalex.org/W3045402999","https://openalex.org/W2953981431","https://openalex.org/W3015731157","https://openalex.org/W3170914142","https://openalex.org/W3211540990"],"abstract_inverted_index":{"In":[0,59,159],"this":[1],"paper,":[2],"we":[3,24,70],"propose":[4],"to":[5,47,61,65,98,102,162,171],"use":[6],"deep":[7,33,73,115,146,173],"policy":[8,165],"networks":[9],"which":[10,104],"are":[11,96,185],"trained":[12,78],"with":[13,133,140,180],"an":[14,120,163],"advantage":[15],"actor-critic":[16,121,145],"method":[17,117],"for":[18,108],"statistically":[19],"optimised":[20],"dialogue":[21],"systems.":[22],"First,":[23],"show":[25,71,112],"that,":[26],"on":[27,80,90,119,177,187],"summary":[28,68],"state":[29,42,83],"and":[30,43,56,84,156],"action":[31,44,85],"spaces,":[32,69],"Reinforcement":[34],"Learning":[35],"(RL)":[36],"outperforms":[37],"Gaussian":[38],"Processes":[39],"methods.":[40],"Summary":[41],"spaces":[45],"lead":[46],"good":[48],"performance":[49],"but":[50],"require":[51,99],"pre-engineering":[52],"effort,":[53],"RL":[54,74,116,174],"knowledge,":[55],"domain":[57,190],"expertise.":[58],"order":[60],"remove":[62],"the":[63,81,144,178,193],"need":[64],"define":[66],"such":[67],"that":[72,113],"can":[75,123],"also":[76],"be":[77],"efficiently":[79],"original":[82],"spaces.":[86],"Dialogue":[87,194],"systems":[88],"based":[89,118],"partially":[91],"observable":[92],"Markov":[93],"decision":[94],"processes":[95],"known":[97],"many":[100],"dialogues":[101,138],"train,":[103],"makes":[105],"them":[106],"unappealing":[107],"practical":[109],"deployment.":[110],"We":[111],"a":[114,125,135,141,152,188],"architecture":[122],"exploit":[124],"small":[126],"amount":[127],"of":[128,154],"data":[129,179],"very":[130],"efficiently.":[131],"Indeed,":[132],"only":[134],"few":[136],"hundred":[137],"collected":[139],"handcrafted":[142],"policy,":[143],"learner":[147],"is":[148,166],"considerably":[149],"bootstrapped":[150],"from":[151,192],"combination":[153],"supervised":[155],"batch":[157,181],"RL.":[158,182],"addition,":[160],"convergence":[161],"optimal":[164],"significantly":[167],"sped":[168],"up":[169],"compared":[170],"other":[172],"methods":[175],"initialized":[176],"All":[183],"experiments":[184],"performed":[186],"restaurant":[189],"derived":[191],"State":[195],"Tracking":[196],"Challenge":[197],"2":[198],"(DSTC2)":[199],"dataset.":[200]},"counts_by_year":[{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":6},{"year":2019,"cited_by_count":2},{"year":2018,"cited_by_count":5},{"year":2017,"cited_by_count":6},{"year":2016,"cited_by_count":2},{"year":2015,"cited_by_count":1}],"updated_date":"2026-03-10T16:38:18.471706","created_date":"2025-10-10T00:00:00"}
