{"id":"https://openalex.org/W2021151961","doi":"https://doi.org/10.1162/coli.2008.07-028-r2-05-82","title":"Hybrid Reinforcement/Supervised Learning of Dialogue Policies from Fixed Data Sets","display_name":"Hybrid Reinforcement/Supervised Learning of Dialogue Policies from Fixed Data Sets","publication_year":2008,"publication_date":"2008-07-16","ids":{"openalex":"https://openalex.org/W2021151961","doi":"https://doi.org/10.1162/coli.2008.07-028-r2-05-82","mag":"2021151961"},"language":"en","primary_location":{"id":"doi:10.1162/coli.2008.07-028-r2-05-82","is_oa":false,"landing_page_url":"https://doi.org/10.1162/coli.2008.07-028-r2-05-82","pdf_url":null,"source":{"id":"https://openalex.org/S155526855","display_name":"Computational Linguistics","issn_l":"0891-2017","issn":["0891-2017","1530-9312"],"is_oa":false,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320244","host_organization_name":"Association for Computational Linguistics","host_organization_lineage":["https://openalex.org/P4310320244"],"host_organization_lineage_names":["Association for Computational Linguistics"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Computational Linguistics","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doaj.org/article/0ab376b8b05c4a3bb74267591d8c6d39","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5084321238","display_name":"James Henderson","orcid":"https://orcid.org/0000-0003-3714-4799"},"institutions":[{"id":"https://openalex.org/I4210150799","display_name":"Battelle","ror":"https://ror.org/05vqnxk73","country_code":"CH","type":"nonprofit","lineage":["https://openalex.org/I4210150799"]},{"id":"https://openalex.org/I98677209","display_name":"University of Edinburgh","ror":"https://ror.org/01nrxwf90","country_code":"GB","type":"education","lineage":["https://openalex.org/I98677209"]}],"countries":["CH","GB"],"is_corresponding":true,"raw_author_name":"James Henderson","raw_affiliation_strings":["* Universit\u00e9 de Gen\u00e8ave, D\u00e9partement d'Informatique, Battelle-b\u00e2timent A, 7 route de Drize, 1227 Carouge, Switzerland","** University of Edinburgh, 2 Buccleuch Place, Edinburgh EH8 9LW, UK., "],"affiliations":[{"raw_affiliation_string":"* Universit\u00e9 de Gen\u00e8ave, D\u00e9partement d'Informatique, Battelle-b\u00e2timent A, 7 route de Drize, 1227 Carouge, Switzerland","institution_ids":["https://openalex.org/I4210150799"]},{"raw_affiliation_string":"** University of Edinburgh, 2 Buccleuch Place, Edinburgh EH8 9LW, UK., ","institution_ids":["https://openalex.org/I98677209"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010949145","display_name":"Oliver Lemon","orcid":"https://orcid.org/0000-0001-9497-4743"},"institutions":[{"id":"https://openalex.org/I4210150799","display_name":"Battelle","ror":"https://ror.org/05vqnxk73","country_code":"CH","type":"nonprofit","lineage":["https://openalex.org/I4210150799"]},{"id":"https://openalex.org/I98677209","display_name":"University of Edinburgh","ror":"https://ror.org/01nrxwf90","country_code":"GB","type":"education","lineage":["https://openalex.org/I98677209"]}],"countries":["CH","GB"],"is_corresponding":false,"raw_author_name":"Oliver Lemon","raw_affiliation_strings":["* Universit\u00e9 de Gen\u00e8ave, D\u00e9partement d'Informatique, Battelle-b\u00e2timent A, 7 route de Drize, 1227 Carouge, Switzerland","** University of Edinburgh, 2 Buccleuch Place, Edinburgh EH8 9LW, UK., "],"affiliations":[{"raw_affiliation_string":"* Universit\u00e9 de Gen\u00e8ave, D\u00e9partement d'Informatique, Battelle-b\u00e2timent A, 7 route de Drize, 1227 Carouge, Switzerland","institution_ids":["https://openalex.org/I4210150799"]},{"raw_affiliation_string":"** University of Edinburgh, 2 Buccleuch Place, Edinburgh EH8 9LW, UK., ","institution_ids":["https://openalex.org/I98677209"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5112401163","display_name":"Kallirroi Georgila","orcid":null},"institutions":[{"id":"https://openalex.org/I4210150799","display_name":"Battelle","ror":"https://ror.org/05vqnxk73","country_code":"CH","type":"nonprofit","lineage":["https://openalex.org/I4210150799"]},{"id":"https://openalex.org/I98677209","display_name":"University of Edinburgh","ror":"https://ror.org/01nrxwf90","country_code":"GB","type":"education","lineage":["https://openalex.org/I98677209"]}],"countries":["CH","GB"],"is_corresponding":false,"raw_author_name":"Kallirroi Georgila","raw_affiliation_strings":["* Universit\u00e9 de Gen\u00e8ave, D\u00e9partement d'Informatique, Battelle-b\u00e2timent A, 7 route de Drize, 1227 Carouge, Switzerland","** University of Edinburgh, 2 Buccleuch Place, Edinburgh EH8 9LW, UK., "],"affiliations":[{"raw_affiliation_string":"* Universit\u00e9 de Gen\u00e8ave, D\u00e9partement d'Informatique, Battelle-b\u00e2timent A, 7 route de Drize, 1227 Carouge, Switzerland","institution_ids":["https://openalex.org/I4210150799"]},{"raw_affiliation_string":"** University of Edinburgh, 2 Buccleuch Place, Edinburgh EH8 9LW, UK., ","institution_ids":["https://openalex.org/I98677209"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5084321238"],"corresponding_institution_ids":["https://openalex.org/I4210150799","https://openalex.org/I98677209"],"apc_list":null,"apc_paid":null,"fwci":18.9737,"has_fulltext":false,"cited_by_count":121,"citation_normalized_percentile":{"value":0.99247004,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":100},"biblio":{"volume":"34","issue":"4","first_page":"487","last_page":"511"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10456","display_name":"Multi-Agent Systems and Negotiation","score":0.9933000206947327,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.915477454662323},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8227212429046631},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6516709327697754},{"id":"https://openalex.org/keywords/bootstrapping","display_name":"Bootstrapping (finance)","score":0.651328444480896},{"id":"https://openalex.org/keywords/state-space","display_name":"State space","score":0.6178463697433472},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.6145367622375488},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5728774666786194},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5254824757575989},{"id":"https://openalex.org/keywords/state","display_name":"State (computer science)","score":0.49112364649772644},{"id":"https://openalex.org/keywords/supervised-learning","display_name":"Supervised learning","score":0.4721945524215698},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.45977553725242615},{"id":"https://openalex.org/keywords/data-set","display_name":"Data set","score":0.45547911524772644},{"id":"https://openalex.org/keywords/semi-supervised-learning","display_name":"Semi-supervised learning","score":0.44224902987480164},{"id":"https://openalex.org/keywords/space","display_name":"Space (punctuation)","score":0.4343258738517761},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.10820549726486206},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.0987364649772644},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.08112242817878723}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.915477454662323},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8227212429046631},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6516709327697754},{"id":"https://openalex.org/C207609745","wikidata":"https://www.wikidata.org/wiki/Q4944086","display_name":"Bootstrapping (finance)","level":2,"score":0.651328444480896},{"id":"https://openalex.org/C72434380","wikidata":"https://www.wikidata.org/wiki/Q230930","display_name":"State space","level":2,"score":0.6178463697433472},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.6145367622375488},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5728774666786194},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5254824757575989},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.49112364649772644},{"id":"https://openalex.org/C136389625","wikidata":"https://www.wikidata.org/wiki/Q334384","display_name":"Supervised learning","level":3,"score":0.4721945524215698},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.45977553725242615},{"id":"https://openalex.org/C58489278","wikidata":"https://www.wikidata.org/wiki/Q1172284","display_name":"Data set","level":2,"score":0.45547911524772644},{"id":"https://openalex.org/C58973888","wikidata":"https://www.wikidata.org/wiki/Q1041418","display_name":"Semi-supervised learning","level":2,"score":0.44224902987480164},{"id":"https://openalex.org/C2778572836","wikidata":"https://www.wikidata.org/wiki/Q380933","display_name":"Space (punctuation)","level":2,"score":0.4343258738517761},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.10820549726486206},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.0987364649772644},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.08112242817878723},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C149782125","wikidata":"https://www.wikidata.org/wiki/Q160039","display_name":"Econometrics","level":1,"score":0.0},{"id":"https://openalex.org/C78458016","wikidata":"https://www.wikidata.org/wiki/Q840400","display_name":"Evolutionary biology","level":1,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1162/coli.2008.07-028-r2-05-82","is_oa":false,"landing_page_url":"https://doi.org/10.1162/coli.2008.07-028-r2-05-82","pdf_url":null,"source":{"id":"https://openalex.org/S155526855","display_name":"Computational Linguistics","issn_l":"0891-2017","issn":["0891-2017","1530-9312"],"is_oa":false,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320244","host_organization_name":"Association for Computational Linguistics","host_organization_lineage":["https://openalex.org/P4310320244"],"host_organization_lineage_names":["Association for Computational Linguistics"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Computational Linguistics","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:0ab376b8b05c4a3bb74267591d8c6d39","is_oa":true,"landing_page_url":"https://doaj.org/article/0ab376b8b05c4a3bb74267591d8c6d39","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Computational Linguistics, Vol 34, Iss 4 (2021)","raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:doaj.org/article:0ab376b8b05c4a3bb74267591d8c6d39","is_oa":true,"landing_page_url":"https://doaj.org/article/0ab376b8b05c4a3bb74267591d8c6d39","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Computational Linguistics, Vol 34, Iss 4 (2021)","raw_type":"article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","score":0.550000011920929,"display_name":"Peace, Justice and strong institutions"}],"awards":[{"id":"https://openalex.org/G1904389866","display_name":null,"funder_award_id":"EP/E019501/1","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"}],"funders":[{"id":"https://openalex.org/F4320311904","display_name":"Wellcome Trust","ror":"https://ror.org/029chgv08"},{"id":"https://openalex.org/F4320320300","display_name":"European Commission","ror":"https://ror.org/00k4n6c32"},{"id":"https://openalex.org/F4320334627","display_name":"Engineering and Physical Sciences Research Council","ror":"https://ror.org/0439y7842"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":47,"referenced_works":["https://openalex.org/W13088146","https://openalex.org/W62710299","https://openalex.org/W80526705","https://openalex.org/W117128830","https://openalex.org/W120790241","https://openalex.org/W147552783","https://openalex.org/W160067033","https://openalex.org/W200223693","https://openalex.org/W203030180","https://openalex.org/W1502662603","https://openalex.org/W1503789224","https://openalex.org/W1510073064","https://openalex.org/W1515851193","https://openalex.org/W1530049455","https://openalex.org/W1550220533","https://openalex.org/W1592751638","https://openalex.org/W1600735390","https://openalex.org/W1681299129","https://openalex.org/W2007221309","https://openalex.org/W2039533743","https://openalex.org/W2047192163","https://openalex.org/W2080887205","https://openalex.org/W2083205357","https://openalex.org/W2096145771","https://openalex.org/W2099118758","https://openalex.org/W2101308260","https://openalex.org/W2101445408","https://openalex.org/W2105303215","https://openalex.org/W2106482783","https://openalex.org/W2112259378","https://openalex.org/W2117012949","https://openalex.org/W2117398305","https://openalex.org/W2132997613","https://openalex.org/W2134040603","https://openalex.org/W2134051188","https://openalex.org/W2134466368","https://openalex.org/W2141839844","https://openalex.org/W2144010450","https://openalex.org/W2159250980","https://openalex.org/W2160219061","https://openalex.org/W2163068732","https://openalex.org/W2165574458","https://openalex.org/W2168490009","https://openalex.org/W2171930577","https://openalex.org/W2492794003","https://openalex.org/W2603612888","https://openalex.org/W2911283634"],"related_works":["https://openalex.org/W1586607209","https://openalex.org/W122912556","https://openalex.org/W4312414840","https://openalex.org/W2621411691","https://openalex.org/W2271357838","https://openalex.org/W2556866732","https://openalex.org/W2328989934","https://openalex.org/W2348322200","https://openalex.org/W2981952041","https://openalex.org/W3148060700"],"abstract_inverted_index":{"We":[0,120],"propose":[1,75],"a":[2,10,32,35,42,48,76,93,133,176,181,193,199],"method":[3,15,148,230],"for":[4,115,167,234],"learning":[5,82,88,101,196,202],"dialogue":[6,25,33,96,240],"management":[7,241],"policies":[8,242],"from":[9,132,243],"fixed":[11,58,134],"data":[12,59,137,187,246],"set.":[13],"The":[14,86,228],"addresses":[16],"the":[17,29,54,99,106,110,128,144,158,185,207,211,221],"challenges":[18],"posed":[19],"by":[20,226],"Information":[21,171],"State":[22],"Update":[23],"(ISU)-based":[24],"systems,":[26],"which":[27,116,162],"represent":[28],"state":[30,45,70,140],"of":[31,38,68,95,112,136,146,184,239],"as":[34],"large":[36,44,139],"set":[37,60],"features,":[39],"resulting":[40],"in":[41],"very":[43],"space":[46],"and":[47,71,170,198,236],"huge":[49],"policy":[50,72,108,225],"space.":[51],"To":[52,142],"address":[53,127],"problem":[55],"that":[56,79],"any":[57],"will":[61,231],"only":[62],"provide":[63],"information":[64],"about":[65],"small":[66],"portions":[67,111],"these":[69,113],"spaces,":[73],"we":[74,117,153,163],"hybrid":[77,190],"model":[78,156,191,197],"combines":[80],"reinforcement":[81,87,201],"with":[83,175],"supervised":[84,100,195],"learning.":[85],"is":[89,102],"used":[90,103],"to":[91,104,109,126,130,138,161,215],"optimize":[92],"measure":[94],"reward,":[97],"while":[98],"restrict":[105],"learned":[107],"spaces":[114],"have":[118,164],"data.":[119],"also":[121,205],"use":[122],"linear":[123],"function":[124],"approximation":[125],"need":[129],"generalize":[131],"amount":[135],"spaces.":[141],"demonstrate":[143],"effectiveness":[145],"this":[147,150,155],"on":[149,157,180,210],"challenging":[151],"task,":[152],"trained":[154,179],"COMMUNICATOR":[159,212,223],"corpus,":[160],"added":[165],"annotations":[166],"user":[168,177],"actions":[169],"States.":[172],"When":[173],"tested":[174],"simulation":[178],"different":[182],"part":[183],"same":[186],"set,":[188],"our":[189],"outperforms":[192,206],"pure":[194,200],"model.":[203],"It":[204],"hand-crafted":[208],"systems":[209],"data,":[213],"according":[214],"automatic":[216,237],"evaluation":[217],"measures,":[218],"improving":[219],"over":[220],"average":[222],"system":[224],"10%.":[227],"proposed":[229],"improve":[232],"techniques":[233],"bootstrapping":[235],"optimization":[238],"limited":[244],"initial":[245],"sets.":[247]},"counts_by_year":[{"year":2023,"cited_by_count":5},{"year":2022,"cited_by_count":4},{"year":2021,"cited_by_count":6},{"year":2020,"cited_by_count":7},{"year":2019,"cited_by_count":8},{"year":2018,"cited_by_count":3},{"year":2017,"cited_by_count":4},{"year":2016,"cited_by_count":5},{"year":2015,"cited_by_count":4},{"year":2014,"cited_by_count":16},{"year":2013,"cited_by_count":13},{"year":2012,"cited_by_count":14}],"updated_date":"2026-04-04T08:04:53.788161","created_date":"2025-10-10T00:00:00"}
