{"id":"https://openalex.org/W2012712913","doi":"https://doi.org/10.1109/slt.2012.6424161","title":"Reinforcement learning for spoken dialogue systems using off-policy natural gradient method","display_name":"Reinforcement learning for spoken dialogue systems using off-policy natural gradient method","publication_year":2012,"publication_date":"2012-12-01","ids":{"openalex":"https://openalex.org/W2012712913","doi":"https://doi.org/10.1109/slt.2012.6424161","mag":"2012712913"},"language":"en","primary_location":{"id":"doi:10.1109/slt.2012.6424161","is_oa":false,"landing_page_url":"https://doi.org/10.1109/slt.2012.6424161","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2012 IEEE Spoken Language Technology Workshop (SLT)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5007735982","display_name":"Filip Jur\u010d\u00ed\u010dek","orcid":null},"institutions":[{"id":"https://openalex.org/I21250087","display_name":"Charles University","ror":"https://ror.org/024d6js02","country_code":"CZ","type":"education","lineage":["https://openalex.org/I21250087"]}],"countries":["CZ"],"is_corresponding":true,"raw_author_name":"Filip Jurcicek","raw_affiliation_strings":["Faculty of Mathematics and Physics, Charles University in Prague, Praha, Czech Republic","Faculty of Mathematics and Physics, Charles University in Prague Malostransk\u00e9 n\u00e1m\u011bst\u00ed 25, 118 00, Praha, Czech Republic"],"affiliations":[{"raw_affiliation_string":"Faculty of Mathematics and Physics, Charles University in Prague, Praha, Czech Republic","institution_ids":["https://openalex.org/I21250087"]},{"raw_affiliation_string":"Faculty of Mathematics and Physics, Charles University in Prague Malostransk\u00e9 n\u00e1m\u011bst\u00ed 25, 118 00, Praha, Czech Republic","institution_ids":["https://openalex.org/I21250087"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5007735982"],"corresponding_institution_ids":["https://openalex.org/I21250087"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.08404837,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"7","issue":null,"first_page":"7","last_page":"12"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12128","display_name":"AI in Service Interactions","score":0.9958000183105469,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.9518689513206482},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7774404287338257},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6015320420265198},{"id":"https://openalex.org/keywords/reinforcement","display_name":"Reinforcement","score":0.5985615253448486},{"id":"https://openalex.org/keywords/baseline","display_name":"Baseline (sea)","score":0.5565407872200012},{"id":"https://openalex.org/keywords/natural","display_name":"Natural (archaeology)","score":0.5024383068084717},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.501950740814209},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.42508915066719055},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.34065568447113037},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.08483713865280151},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.0738297700881958}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.9518689513206482},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7774404287338257},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6015320420265198},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.5985615253448486},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.5565407872200012},{"id":"https://openalex.org/C2776608160","wikidata":"https://www.wikidata.org/wiki/Q4785462","display_name":"Natural (archaeology)","level":2,"score":0.5024383068084717},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.501950740814209},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.42508915066719055},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.34065568447113037},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.08483713865280151},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0738297700881958},{"id":"https://openalex.org/C127313418","wikidata":"https://www.wikidata.org/wiki/Q1069","display_name":"Geology","level":0,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C95457728","wikidata":"https://www.wikidata.org/wiki/Q309","display_name":"History","level":0,"score":0.0},{"id":"https://openalex.org/C111368507","wikidata":"https://www.wikidata.org/wiki/Q43518","display_name":"Oceanography","level":1,"score":0.0},{"id":"https://openalex.org/C66938386","wikidata":"https://www.wikidata.org/wiki/Q633538","display_name":"Structural engineering","level":1,"score":0.0},{"id":"https://openalex.org/C166957645","wikidata":"https://www.wikidata.org/wiki/Q23498","display_name":"Archaeology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/slt.2012.6424161","is_oa":false,"landing_page_url":"https://doi.org/10.1109/slt.2012.6424161","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2012 IEEE Spoken Language Technology Workshop (SLT)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","score":0.4099999964237213,"id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":19,"referenced_works":["https://openalex.org/W178897730","https://openalex.org/W1778387566","https://openalex.org/W1970789124","https://openalex.org/W2015936967","https://openalex.org/W2021151961","https://openalex.org/W2037897789","https://openalex.org/W2040123554","https://openalex.org/W2115101920","https://openalex.org/W2119015791","https://openalex.org/W2119717200","https://openalex.org/W2130339357","https://openalex.org/W2130801532","https://openalex.org/W2134051188","https://openalex.org/W2154740693","https://openalex.org/W2168359464","https://openalex.org/W2594639291","https://openalex.org/W4299401133","https://openalex.org/W6679257226","https://openalex.org/W7046658373"],"related_works":["https://openalex.org/W2383111961","https://openalex.org/W2365952365","https://openalex.org/W2352448290","https://openalex.org/W4310083477","https://openalex.org/W2328553770","https://openalex.org/W2380820513","https://openalex.org/W2920061524","https://openalex.org/W2913146933","https://openalex.org/W2372385138","https://openalex.org/W4296359239"],"abstract_inverted_index":{"Reinforcement":[0],"learning":[1,66],"methods":[2],"have":[3],"been":[4],"successfully":[5],"used":[6],"to":[7,37],"optimise":[8],"dialogue":[9,13,22,48,83,99,107],"strategies":[10],"in":[11,85],"statistical":[12],"systems.":[14],"Typically,":[15],"reinforcement":[16,42,65],"techniques":[17],"learn":[18],"on-policy":[19],"i.e.,":[20],"the":[21,28,86,94,104],"strategy":[23,49],"is":[24,30,40,78],"updated":[25],"online":[26],"while":[27],"system":[29,84],"interacting":[31],"with":[32],"a":[33,52,62,81,98],"user.":[34],"An":[35],"alternative":[36],"this":[38],"approach":[39],"off-policy":[41,64],"learning,":[43],"which":[44,101],"estimates":[45],"an":[46],"optimal":[47],"offline":[50],"from":[51],"fixed":[53],"corpus":[54],"of":[55],"previously":[56],"collected":[57],"dialogues.":[58],"This":[59],"paper":[60],"proposes":[61],"novel":[63],"method":[67,96],"based":[68],"on":[69,80],"natural":[70],"policy":[71],"gradients":[72],"and":[73],"importance":[74],"sampling.":[75],"The":[76,90],"algorithm":[77],"evaluated":[79],"spoken":[82],"tourist":[87],"information":[88],"domain.":[89],"experiments":[91],"indicate":[92],"that":[93],"proposed":[95],"learns":[97],"strategy,":[100],"significantly":[102],"outperforms":[103],"baseline":[105],"handcrafted":[106],"policy.":[108]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
