{"id":"https://openalex.org/W3016142228","doi":"https://doi.org/10.1109/icassp40776.2020.9053235","title":"Improving Sample-Efficiency in Reinforcement Learning for Dialogue Systems by Using Trainable-Action-Mask","display_name":"Improving Sample-Efficiency in Reinforcement Learning for Dialogue Systems by Using Trainable-Action-Mask","publication_year":2020,"publication_date":"2020-04-09","ids":{"openalex":"https://openalex.org/W3016142228","doi":"https://doi.org/10.1109/icassp40776.2020.9053235","mag":"3016142228"},"language":"en","primary_location":{"id":"doi:10.1109/icassp40776.2020.9053235","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp40776.2020.9053235","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2020 - 2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5080190170","display_name":"Yen-Chen Wu","orcid":null},"institutions":[{"id":"https://openalex.org/I4210096386","display_name":"Bridge University","ror":"https://ror.org/00cbm0437","country_code":"SS","type":"education","lineage":["https://openalex.org/I4210096386"]},{"id":"https://openalex.org/I241749","display_name":"University of Cambridge","ror":"https://ror.org/013meh722","country_code":"GB","type":"education","lineage":["https://openalex.org/I241749"]}],"countries":["GB","SS"],"is_corresponding":true,"raw_author_name":"Yen-Chen Wu","raw_affiliation_strings":["Cambridge University"],"affiliations":[{"raw_affiliation_string":"Cambridge University","institution_ids":["https://openalex.org/I4210096386","https://openalex.org/I241749"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5039215196","display_name":"Bo-Hsiang Tseng","orcid":null},"institutions":[{"id":"https://openalex.org/I241749","display_name":"University of Cambridge","ror":"https://ror.org/013meh722","country_code":"GB","type":"education","lineage":["https://openalex.org/I241749"]},{"id":"https://openalex.org/I4210096386","display_name":"Bridge University","ror":"https://ror.org/00cbm0437","country_code":"SS","type":"education","lineage":["https://openalex.org/I4210096386"]}],"countries":["GB","SS"],"is_corresponding":false,"raw_author_name":"Bo-Hsiang Tseng","raw_affiliation_strings":["Cambridge University"],"affiliations":[{"raw_affiliation_string":"Cambridge University","institution_ids":["https://openalex.org/I4210096386","https://openalex.org/I241749"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5108711177","display_name":"Carl Edward Rasmussen","orcid":null},"institutions":[{"id":"https://openalex.org/I241749","display_name":"University of Cambridge","ror":"https://ror.org/013meh722","country_code":"GB","type":"education","lineage":["https://openalex.org/I241749"]},{"id":"https://openalex.org/I4210096386","display_name":"Bridge University","ror":"https://ror.org/00cbm0437","country_code":"SS","type":"education","lineage":["https://openalex.org/I4210096386"]}],"countries":["GB","SS"],"is_corresponding":false,"raw_author_name":"Carl Edward Rasmussen","raw_affiliation_strings":["Cambridge University"],"affiliations":[{"raw_affiliation_string":"Cambridge University","institution_ids":["https://openalex.org/I4210096386","https://openalex.org/I241749"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5080190170"],"corresponding_institution_ids":["https://openalex.org/I241749","https://openalex.org/I4210096386"],"apc_list":null,"apc_paid":null,"fwci":0.5302,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.71728977,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"8024","last_page":"8028"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9970999956130981,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8547754287719727},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8131687641143799},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.7331409454345703},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6804119944572449},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.5781065225601196},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.5640389323234558},{"id":"https://openalex.org/keywords/sample","display_name":"Sample (material)","score":0.5051911473274231},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.4609990119934082},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.4366152286529541},{"id":"https://openalex.org/keywords/baseline","display_name":"Baseline (sea)","score":0.43319687247276306},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.4276145100593567}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8547754287719727},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8131687641143799},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.7331409454345703},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6804119944572449},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.5781065225601196},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5640389323234558},{"id":"https://openalex.org/C198531522","wikidata":"https://www.wikidata.org/wiki/Q485146","display_name":"Sample (material)","level":2,"score":0.5051911473274231},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.4609990119934082},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.4366152286529541},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.43319687247276306},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.4276145100593567},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C127313418","wikidata":"https://www.wikidata.org/wiki/Q1069","display_name":"Geology","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C43617362","wikidata":"https://www.wikidata.org/wiki/Q170050","display_name":"Chromatography","level":1,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C111368507","wikidata":"https://www.wikidata.org/wiki/Q43518","display_name":"Oceanography","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp40776.2020.9053235","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp40776.2020.9053235","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2020 - 2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":40,"referenced_works":["https://openalex.org/W1491843047","https://openalex.org/W1757796397","https://openalex.org/W1758031947","https://openalex.org/W1771410628","https://openalex.org/W1975244201","https://openalex.org/W2035934535","https://openalex.org/W2062175565","https://openalex.org/W2120045257","https://openalex.org/W2257979135","https://openalex.org/W2290354866","https://openalex.org/W2396229782","https://openalex.org/W2559038528","https://openalex.org/W2567374473","https://openalex.org/W2739936944","https://openalex.org/W2772217324","https://openalex.org/W2783543950","https://openalex.org/W2798494119","https://openalex.org/W2889186204","https://openalex.org/W2950314731","https://openalex.org/W2950471160","https://openalex.org/W2950517718","https://openalex.org/W2962872206","https://openalex.org/W2963064439","https://openalex.org/W2963604043","https://openalex.org/W2964077562","https://openalex.org/W2964220198","https://openalex.org/W3021208093","https://openalex.org/W4297789121","https://openalex.org/W4297806413","https://openalex.org/W4298857966","https://openalex.org/W6637967152","https://openalex.org/W6638018090","https://openalex.org/W6638058698","https://openalex.org/W6692405165","https://openalex.org/W6696324988","https://openalex.org/W6730098006","https://openalex.org/W6731227521","https://openalex.org/W6740836278","https://openalex.org/W6746188011","https://openalex.org/W6747812028"],"related_works":["https://openalex.org/W2383111961","https://openalex.org/W2365952365","https://openalex.org/W2352448290","https://openalex.org/W2380820513","https://openalex.org/W2913146933","https://openalex.org/W2372385138","https://openalex.org/W4296359239","https://openalex.org/W2101155126","https://openalex.org/W2043093291","https://openalex.org/W2363545964"],"abstract_inverted_index":{"By":[0],"interacting":[1],"with":[2,101],"human":[3],"and":[4,53,110],"learning":[5,10,35],"from":[6,79],"reward":[7],"signals,":[8],"reinforcement":[9,34],"is":[11],"an":[12],"ideal":[13],"way":[14],"to":[15,48,67,104,115],"build":[16],"conversational":[17],"AI.":[18],"Concerning":[19],"the":[20,29,55,59,102,106],"expenses":[21],"of":[22,97],"real-users'":[23],"responses,":[24],"improving":[25],"sample-efficiency":[26],"has":[27],"been":[28],"key":[30],"issue":[31],"when":[32],"applying":[33],"in":[36,89],"real-world":[37],"spoken":[38],"dialogue":[39],"systems":[40],"(SDS).":[41],"Handcrafted":[42],"action":[43,61],"masks":[44],"are":[45],"commonly":[46],"used":[47],"rule":[49],"out":[50],"impossible":[51],"actions":[52],"accelerate":[54],"training":[56,98],"process.":[57],"However,":[58],"handcrafted":[60],"mask":[62],"can":[63],"barely":[64],"be":[65],"generalized":[66],"unseen":[68],"domains.":[69],"In":[70,86],"this":[71],"paper,":[72],"we":[73],"propose":[74],"trainable-action-mask":[75],"(TAM)":[76],"which":[77],"learns":[78],"data":[80],"automatically":[81],"without":[82],"handcrafting":[83],"complicated":[84],"rules.":[85],"our":[87],"experiments":[88],"Cambridge":[90],"Restaurant":[91],"domain,":[92],"TAM":[93],"requires":[94],"only":[95],"30%":[96],"data,":[99],"compared":[100],"baseline,":[103],"reach":[105],"80%":[107],"success":[108],"rate":[109],"it":[111],"also":[112],"shows":[113],"robustness":[114],"noisy":[116],"environments.":[117]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":2},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
