{"id":"https://openalex.org/W2963043030","doi":"https://doi.org/10.1109/asru.2017.8268975","title":"Iterative policy learning in end-to-end trainable task-oriented neural dialog models","display_name":"Iterative policy learning in end-to-end trainable task-oriented neural dialog models","publication_year":2017,"publication_date":"2017-12-01","ids":{"openalex":"https://openalex.org/W2963043030","doi":"https://doi.org/10.1109/asru.2017.8268975","mag":"2963043030"},"language":"en","primary_location":{"id":"doi:10.1109/asru.2017.8268975","is_oa":false,"landing_page_url":"https://doi.org/10.1109/asru.2017.8268975","pdf_url":null,"source":{"id":"https://openalex.org/S4306498158","display_name":"2017 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2017 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100339921","display_name":"Bing Liu","orcid":"https://orcid.org/0000-0002-2365-6606"},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Bing Liu","raw_affiliation_strings":["Electrical and Computer Engineering, Carnegie Mellon University"],"affiliations":[{"raw_affiliation_string":"Electrical and Computer Engineering, Carnegie Mellon University","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5028182466","display_name":"Ian Lane","orcid":null},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ian Lane","raw_affiliation_strings":["Language Technologies Institute, Carnegie Mellon University"],"affiliations":[{"raw_affiliation_string":"Language Technologies Institute, Carnegie Mellon University","institution_ids":["https://openalex.org/I74973139"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5100339921"],"corresponding_institution_ids":["https://openalex.org/I74973139"],"apc_list":null,"apc_paid":null,"fwci":11.9269,"has_fulltext":false,"cited_by_count":93,"citation_normalized_percentile":{"value":0.98766224,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"482","last_page":"489"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9976999759674072,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/dialog-box","display_name":"Dialog box","score":0.9796693325042725},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8547363877296448},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.7533520460128784},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7197286486625671},{"id":"https://openalex.org/keywords/dialog-system","display_name":"Dialog system","score":0.6808313727378845},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6278437972068787},{"id":"https://openalex.org/keywords/policy-learning","display_name":"Policy learning","score":0.533370852470398},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.5020337104797363},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4303261935710907},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.40105515718460083},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.06768512725830078},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.06324982643127441}],"concepts":[{"id":"https://openalex.org/C173853756","wikidata":"https://www.wikidata.org/wiki/Q86915","display_name":"Dialog box","level":2,"score":0.9796693325042725},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8547363877296448},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.7533520460128784},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7197286486625671},{"id":"https://openalex.org/C190954187","wikidata":"https://www.wikidata.org/wiki/Q5270587","display_name":"Dialog system","level":3,"score":0.6808313727378845},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6278437972068787},{"id":"https://openalex.org/C2779436431","wikidata":"https://www.wikidata.org/wiki/Q30672407","display_name":"Policy learning","level":2,"score":0.533370852470398},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.5020337104797363},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4303261935710907},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.40105515718460083},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.06768512725830078},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.06324982643127441},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/asru.2017.8268975","is_oa":false,"landing_page_url":"https://doi.org/10.1109/asru.2017.8268975","pdf_url":null,"source":{"id":"https://openalex.org/S4306498158","display_name":"2017 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2017 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.44999998807907104,"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":55,"referenced_works":["https://openalex.org/W140747314","https://openalex.org/W160067033","https://openalex.org/W178897730","https://openalex.org/W311892248","https://openalex.org/W889023230","https://openalex.org/W1522301498","https://openalex.org/W1591706642","https://openalex.org/W1975244201","https://openalex.org/W1987326241","https://openalex.org/W2021151961","https://openalex.org/W2037897789","https://openalex.org/W2047335008","https://openalex.org/W2064675550","https://openalex.org/W2095705004","https://openalex.org/W2119717200","https://openalex.org/W2137871902","https://openalex.org/W2152342063","https://openalex.org/W2168490009","https://openalex.org/W2170323078","https://openalex.org/W2250297846","https://openalex.org/W2251058040","https://openalex.org/W2251149342","https://openalex.org/W2252140739","https://openalex.org/W2311783643","https://openalex.org/W2412715517","https://openalex.org/W2412899141","https://openalex.org/W2432549722","https://openalex.org/W2468710617","https://openalex.org/W2473965551","https://openalex.org/W2571927164","https://openalex.org/W2594726847","https://openalex.org/W2749436976","https://openalex.org/W2807142242","https://openalex.org/W2949252816","https://openalex.org/W2962682659","https://openalex.org/W2962776342","https://openalex.org/W2962883855","https://openalex.org/W2963050422","https://openalex.org/W2963064439","https://openalex.org/W2963167310","https://openalex.org/W2963412005","https://openalex.org/W2963797754","https://openalex.org/W2963963856","https://openalex.org/W2964044380","https://openalex.org/W2964101860","https://openalex.org/W2964121744","https://openalex.org/W2964352131","https://openalex.org/W4295249402","https://openalex.org/W6631190155","https://openalex.org/W6635590879","https://openalex.org/W6691139419","https://openalex.org/W6712785310","https://openalex.org/W6715475388","https://openalex.org/W6734696739","https://openalex.org/W6752572083"],"related_works":["https://openalex.org/W2500779211","https://openalex.org/W48079147","https://openalex.org/W326836678","https://openalex.org/W1963944933","https://openalex.org/W2563921006","https://openalex.org/W1600043506","https://openalex.org/W2111550420","https://openalex.org/W3133451788","https://openalex.org/W2755402024","https://openalex.org/W2963043030"],"abstract_inverted_index":{"In":[0],"this":[1,60],"paper,":[2],"we":[3],"present":[4],"a":[5,31,37,41,54,85,90],"deep":[6,73,123],"reinforcement":[7],"learning":[8,24,95],"(RL)":[9],"framework":[10],"for":[11],"iterative":[12],"dialog":[13,19,25,32,56,66,87,98,127],"policy":[14,26],"optimization":[15],"in":[16,23],"end-to-end":[17],"task-oriented":[18,115],"systems.":[20],"Popular":[21],"approaches":[22],"with":[27,72,100,122,135],"RL":[28,74,170],"include":[29],"letting":[30,109],"agent":[33,67,88,128],"to":[34,113,153,165],"learn":[35],"against":[36],"user":[38,43,70,92,131],"simulator.":[39],"Building":[40],"reliable":[42],"simulator,":[44],"however,":[45],"is":[46],"not":[47],"trivial,":[48],"often":[49],"as":[50,52],"difficult":[51],"building":[53],"good":[55],"agent.":[57],"We":[58,82,103],"address":[59],"challenge":[61],"by":[62,75,94,108],"jointly":[63],"optimizing":[64,119],"the":[65,69,79,110,126,130,149],"and":[68,89,117,129,160,168],"simulator":[71,93,132],"simulating":[76],"dialogs":[77,116],"between":[78],"two":[80,111],"agents.":[81],"first":[83],"bootstrap":[84],"basic":[86,91],"directly":[96],"from":[97],"corpora":[99],"supervised":[101,166],"training.":[102],"then":[104],"improve":[105],"them":[106],"further":[107],"agents":[112],"conduct":[114],"iteratively":[118],"their":[120],"policies":[121],"RL.":[124],"Both":[125],"are":[133],"designed":[134],"neural":[136],"network":[137],"models":[138],"that":[139,148],"can":[140],"be":[141],"trained":[142],"end-to-end.":[143],"Our":[144],"experiment":[145],"results":[146],"show":[147],"proposed":[150],"method":[151],"leads":[152],"promising":[154],"improvements":[155],"on":[156],"task":[157,162],"success":[158],"rate":[159],"total":[161],"reward":[163],"comparing":[164],"training":[167,171],"single-agent":[169],"baseline":[172],"models.":[173]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":9},{"year":2022,"cited_by_count":8},{"year":2021,"cited_by_count":13},{"year":2020,"cited_by_count":22},{"year":2019,"cited_by_count":18},{"year":2018,"cited_by_count":17}],"updated_date":"2026-04-04T08:04:53.788161","created_date":"2025-10-10T00:00:00"}