{"id":"https://openalex.org/W2152342063","doi":"https://doi.org/10.3115/v1/p14-1047","title":"Single-Agent vs. Multi-Agent Techniques for Concurrent Reinforcement Learning of Negotiation Dialogue Policies","display_name":"Single-Agent vs. Multi-Agent Techniques for Concurrent Reinforcement Learning of Negotiation Dialogue Policies","publication_year":2014,"publication_date":"2014-01-01","ids":{"openalex":"https://openalex.org/W2152342063","doi":"https://doi.org/10.3115/v1/p14-1047","mag":"2152342063"},"language":"en","primary_location":{"id":"doi:10.3115/v1/p14-1047","is_oa":true,"landing_page_url":"https://doi.org/10.3115/v1/p14-1047","pdf_url":"https://aclanthology.org/P14-1047.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 52nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://aclanthology.org/P14-1047.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5112401163","display_name":"Kallirroi Georgila","orcid":null},"institutions":[{"id":"https://openalex.org/I4210087747","display_name":"Creative Technologies (United States)","ror":"https://ror.org/001qkb777","country_code":"US","type":"company","lineage":["https://openalex.org/I4210087747"]},{"id":"https://openalex.org/I1174212","display_name":"University of Southern California","ror":"https://ror.org/03taz7m60","country_code":"US","type":"education","lineage":["https://openalex.org/I1174212"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Kallirroi Georgila","raw_affiliation_strings":["University of Southern California Institute for Creative Technologies 12015 Waterfront Drive, Playa Vista, CA 90094, USA"],"affiliations":[{"raw_affiliation_string":"University of Southern California Institute for Creative Technologies 12015 Waterfront Drive, Playa Vista, CA 90094, USA","institution_ids":["https://openalex.org/I4210087747","https://openalex.org/I1174212"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5041595625","display_name":"Claire Nelson","orcid":null},"institutions":[{"id":"https://openalex.org/I1174212","display_name":"University of Southern California","ror":"https://ror.org/03taz7m60","country_code":"US","type":"education","lineage":["https://openalex.org/I1174212"]},{"id":"https://openalex.org/I4210087747","display_name":"Creative Technologies (United States)","ror":"https://ror.org/001qkb777","country_code":"US","type":"company","lineage":["https://openalex.org/I4210087747"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Claire Nelson","raw_affiliation_strings":["University of Southern California Institute for Creative Technologies 12015 Waterfront Drive, Playa Vista, CA 90094, USA"],"affiliations":[{"raw_affiliation_string":"University of Southern California Institute for Creative Technologies 12015 Waterfront Drive, Playa Vista, CA 90094, USA","institution_ids":["https://openalex.org/I4210087747","https://openalex.org/I1174212"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5004384107","display_name":"David Traum","orcid":null},"institutions":[{"id":"https://openalex.org/I1174212","display_name":"University of Southern California","ror":"https://ror.org/03taz7m60","country_code":"US","type":"education","lineage":["https://openalex.org/I1174212"]},{"id":"https://openalex.org/I4210087747","display_name":"Creative Technologies (United States)","ror":"https://ror.org/001qkb777","country_code":"US","type":"company","lineage":["https://openalex.org/I4210087747"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"David Traum","raw_affiliation_strings":["University of Southern California Institute for Creative Technologies 12015 Waterfront Drive, Playa Vista, CA 90094, USA"],"affiliations":[{"raw_affiliation_string":"University of Southern California Institute for Creative Technologies 12015 Waterfront Drive, Playa Vista, CA 90094, USA","institution_ids":["https://openalex.org/I4210087747","https://openalex.org/I1174212"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5112401163"],"corresponding_institution_ids":["https://openalex.org/I1174212","https://openalex.org/I4210087747"],"apc_list":null,"apc_paid":null,"fwci":5.4969,"has_fulltext":true,"cited_by_count":32,"citation_normalized_percentile":{"value":0.95953434,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"500","last_page":"510"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10456","display_name":"Multi-Agent Systems and Negotiation","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9955000281333923,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8864923119544983},{"id":"https://openalex.org/keywords/negotiation","display_name":"Negotiation","score":0.8210203051567078},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7487138509750366},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5207188725471497},{"id":"https://openalex.org/keywords/convergence","display_name":"Convergence (economics)","score":0.49912166595458984},{"id":"https://openalex.org/keywords/resource-allocation","display_name":"Resource allocation","score":0.4845663607120514},{"id":"https://openalex.org/keywords/climbing","display_name":"Climbing","score":0.4841066598892212},{"id":"https://openalex.org/keywords/policy-learning","display_name":"Policy learning","score":0.45487555861473083},{"id":"https://openalex.org/keywords/multi-agent-system","display_name":"Multi-agent system","score":0.428579717874527},{"id":"https://openalex.org/keywords/resource","display_name":"Resource (disambiguation)","score":0.41644200682640076},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3663468360900879},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.08124101161956787}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8864923119544983},{"id":"https://openalex.org/C199776023","wikidata":"https://www.wikidata.org/wiki/Q202875","display_name":"Negotiation","level":2,"score":0.8210203051567078},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7487138509750366},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5207188725471497},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.49912166595458984},{"id":"https://openalex.org/C29202148","wikidata":"https://www.wikidata.org/wiki/Q287260","display_name":"Resource allocation","level":2,"score":0.4845663607120514},{"id":"https://openalex.org/C95038775","wikidata":"https://www.wikidata.org/wiki/Q22857","display_name":"Climbing","level":2,"score":0.4841066598892212},{"id":"https://openalex.org/C2779436431","wikidata":"https://www.wikidata.org/wiki/Q30672407","display_name":"Policy learning","level":2,"score":0.45487555861473083},{"id":"https://openalex.org/C41550386","wikidata":"https://www.wikidata.org/wiki/Q529909","display_name":"Multi-agent system","level":2,"score":0.428579717874527},{"id":"https://openalex.org/C206345919","wikidata":"https://www.wikidata.org/wiki/Q20380951","display_name":"Resource (disambiguation)","level":2,"score":0.41644200682640076},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3663468360900879},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.08124101161956787},{"id":"https://openalex.org/C50522688","wikidata":"https://www.wikidata.org/wiki/Q189833","display_name":"Economic growth","level":1,"score":0.0},{"id":"https://openalex.org/C66938386","wikidata":"https://www.wikidata.org/wiki/Q633538","display_name":"Structural engineering","level":1,"score":0.0},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.3115/v1/p14-1047","is_oa":true,"landing_page_url":"https://doi.org/10.3115/v1/p14-1047","pdf_url":"https://aclanthology.org/P14-1047.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 52nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.3115/v1/p14-1047","is_oa":true,"landing_page_url":"https://doi.org/10.3115/v1/p14-1047","pdf_url":"https://aclanthology.org/P14-1047.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 52nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G685736895","display_name":"RI: Small: Reinforcement Learning for Realistic Statistical Spoken Dialogue Systems - Beyond Slot-Filling Applications","funder_award_id":"1117313","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2152342063.pdf","grobid_xml":"https://content.openalex.org/works/W2152342063.grobid-xml"},"referenced_works_count":44,"referenced_works":["https://openalex.org/W62710299","https://openalex.org/W311892248","https://openalex.org/W1211946649","https://openalex.org/W1513468570","https://openalex.org/W1524881148","https://openalex.org/W1542941925","https://openalex.org/W1681299129","https://openalex.org/W1746819321","https://openalex.org/W1987326241","https://openalex.org/W1999874108","https://openalex.org/W2001050921","https://openalex.org/W2021151961","https://openalex.org/W2035934535","https://openalex.org/W2037897789","https://openalex.org/W2054716580","https://openalex.org/W2056894129","https://openalex.org/W2084799336","https://openalex.org/W2099618002","https://openalex.org/W2101445408","https://openalex.org/W2104602264","https://openalex.org/W2105715011","https://openalex.org/W2109038907","https://openalex.org/W2113033979","https://openalex.org/W2115714256","https://openalex.org/W2119015791","https://openalex.org/W2120327309","https://openalex.org/W2121863487","https://openalex.org/W2142831953","https://openalex.org/W2153672931","https://openalex.org/W2156974606","https://openalex.org/W2168490009","https://openalex.org/W2169430966","https://openalex.org/W2171079152","https://openalex.org/W2231198303","https://openalex.org/W2246008130","https://openalex.org/W2250245054","https://openalex.org/W2250681874","https://openalex.org/W2312609093","https://openalex.org/W2401150877","https://openalex.org/W2586680856","https://openalex.org/W3158638686","https://openalex.org/W4211049957","https://openalex.org/W4214717370","https://openalex.org/W4285719527"],"related_works":["https://openalex.org/W643038845","https://openalex.org/W2143761673","https://openalex.org/W2521924929","https://openalex.org/W2392978157","https://openalex.org/W2364780071","https://openalex.org/W3212815067","https://openalex.org/W4313245278","https://openalex.org/W2284468552","https://openalex.org/W655164699","https://openalex.org/W1598197230"],"abstract_inverted_index":{"We":[0,97,111],"use":[1],"single-agent":[2,125],"and":[3,51,75,90,94,127],"multi-agent":[4],"Reinforcement":[5],"Learning":[6],"(RL)":[7],"for":[8,30,109],"learning":[9,73,130],"dialogue":[10,117],"policies":[11,118],"in":[12],"a":[13,120],"resource":[14],"allocation":[15],"negotiation":[16],"scenario.":[17],"Two":[18],"agents":[19],"learn":[20,40],"concurrently":[21],"by":[22],"interacting":[23],"with":[24],"each":[25],"other":[26],"without":[27],"any":[28],"need":[29],"simulated":[31],"users":[32],"(SUs)":[33],"to":[34,39,86,123],"train":[35],"against":[36],"or":[37,53,129],"corpora":[38],"from.":[41],"In":[42],"particular,":[43],"we":[44],"compare":[45],"the":[46,61,67,72,76],"Qlearning,":[47],"Policy":[48,56],"Hill-Climbing":[49,57],"(PHC)":[50],"Win":[52],"Learn":[54],"Fast":[55],"(PHC-WoLF)":[58],"algorithms,":[59],"varying":[60],"scenario":[62],"complexity":[63],"(state":[64],"space":[65],"size),":[66],"number":[68],"of":[69,116],"training":[70],"episodes,":[71],"rate,":[74],"exploration":[77,105],"rate.":[78],"Our":[79],"results":[80],"show":[81,99],"that":[82,100,113],"generally":[83],"Q-learning":[84],"fails":[85],"converge":[87,93],"whereas":[88],"PHC":[89],"PHC-WoLF":[91],"always":[92],"perform":[95],"similarly.":[96],"also":[98],"very":[101],"high":[102],"gradually":[103],"decreasing":[104],"rates":[106],"are":[107],"required":[108],"convergence.":[110],"conclude":[112],"multiagent":[114],"RL":[115,126],"is":[119],"promising":[121],"alternative":[122],"using":[124],"SUs":[128],"directly":[131],"from":[132],"corpora.":[133]},"counts_by_year":[{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":3},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":7},{"year":2019,"cited_by_count":2},{"year":2018,"cited_by_count":1},{"year":2017,"cited_by_count":4},{"year":2016,"cited_by_count":3},{"year":2015,"cited_by_count":6}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
