{"id":"https://openalex.org/W2740396279","doi":"https://doi.org/10.24963/ijcai.2017/186","title":"Transfer Learning in Multi-Armed Bandits: A Causal Approach","display_name":"Transfer Learning in Multi-Armed Bandits: A Causal Approach","publication_year":2017,"publication_date":"2017-07-28","ids":{"openalex":"https://openalex.org/W2740396279","doi":"https://doi.org/10.24963/ijcai.2017/186","mag":"2740396279"},"language":"en","primary_location":{"id":"doi:10.24963/ijcai.2017/186","is_oa":true,"landing_page_url":"https://doi.org/10.24963/ijcai.2017/186","pdf_url":"https://www.ijcai.org/proceedings/2017/0186.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Twenty-Sixth International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.ijcai.org/proceedings/2017/0186.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5081473407","display_name":"Junzhe Zhang","orcid":"https://orcid.org/0000-0003-0215-7514"},"institutions":[{"id":"https://openalex.org/I219193219","display_name":"Purdue University West Lafayette","ror":"https://ror.org/02dqehb95","country_code":"US","type":"education","lineage":["https://openalex.org/I219193219"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Junzhe Zhang","raw_affiliation_strings":["Purdue University, West Lafayette, IN"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Purdue University, West Lafayette, IN","institution_ids":["https://openalex.org/I219193219"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5039620960","display_name":"Elias Bareinboim","orcid":null},"institutions":[{"id":"https://openalex.org/I219193219","display_name":"Purdue University West Lafayette","ror":"https://ror.org/02dqehb95","country_code":"US","type":"education","lineage":["https://openalex.org/I219193219"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Elias Bareinboim","raw_affiliation_strings":["Purdue University, West Lafayette, IN"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Purdue University, West Lafayette, IN","institution_ids":["https://openalex.org/I219193219"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5081473407"],"corresponding_institution_ids":["https://openalex.org/I219193219"],"apc_list":null,"apc_paid":null,"fwci":3.2261,"has_fulltext":false,"cited_by_count":38,"citation_normalized_percentile":{"value":0.92082065,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":95,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1340","last_page":"1346"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12072","display_name":"Machine Learning and Algorithms","score":0.9954000115394592,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7702432870864868},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.6589016318321228},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.6495675444602966},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5872179269790649},{"id":"https://openalex.org/keywords/causal-inference","display_name":"Causal inference","score":0.5741441249847412},{"id":"https://openalex.org/keywords/transfer-of-learning","display_name":"Transfer of learning","score":0.5264506936073303},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.5232181549072266},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.472858190536499},{"id":"https://openalex.org/keywords/convergence","display_name":"Convergence (economics)","score":0.4414336085319519},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.09727659821510315}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7702432870864868},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.6589016318321228},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.6495675444602966},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5872179269790649},{"id":"https://openalex.org/C158600405","wikidata":"https://www.wikidata.org/wiki/Q5054566","display_name":"Causal inference","level":2,"score":0.5741441249847412},{"id":"https://openalex.org/C150899416","wikidata":"https://www.wikidata.org/wiki/Q1820378","display_name":"Transfer of learning","level":2,"score":0.5264506936073303},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5232181549072266},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.472858190536499},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.4414336085319519},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.09727659821510315},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C50522688","wikidata":"https://www.wikidata.org/wiki/Q189833","display_name":"Economic growth","level":1,"score":0.0},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C149782125","wikidata":"https://www.wikidata.org/wiki/Q160039","display_name":"Econometrics","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.24963/ijcai.2017/186","is_oa":true,"landing_page_url":"https://doi.org/10.24963/ijcai.2017/186","pdf_url":"https://www.ijcai.org/proceedings/2017/0186.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Twenty-Sixth International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.24963/ijcai.2017/186","is_oa":true,"landing_page_url":"https://doi.org/10.24963/ijcai.2017/186","pdf_url":"https://www.ijcai.org/proceedings/2017/0186.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Twenty-Sixth International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2740396279.pdf","grobid_xml":"https://content.openalex.org/works/W2740396279.grobid-xml"},"referenced_works_count":22,"referenced_works":["https://openalex.org/W158722652","https://openalex.org/W1538924311","https://openalex.org/W1582948757","https://openalex.org/W1673419196","https://openalex.org/W1986014385","https://openalex.org/W1992816730","https://openalex.org/W2097381042","https://openalex.org/W2098723043","https://openalex.org/W2099471337","https://openalex.org/W2108738385","https://openalex.org/W2113065326","https://openalex.org/W2119850747","https://openalex.org/W2161252410","https://openalex.org/W2184746314","https://openalex.org/W2466989778","https://openalex.org/W2616287117","https://openalex.org/W2735260544","https://openalex.org/W2949186496","https://openalex.org/W3100329718","https://openalex.org/W4234228486","https://openalex.org/W4248145776","https://openalex.org/W4299515571"],"related_works":["https://openalex.org/W4306904969","https://openalex.org/W2138720691","https://openalex.org/W4362501864","https://openalex.org/W4380318855","https://openalex.org/W3084456289","https://openalex.org/W2024136090","https://openalex.org/W4391331176","https://openalex.org/W2031695474","https://openalex.org/W2586732548","https://openalex.org/W2964765435"],"abstract_inverted_index":{"Reinforcement":[0],"learning":[1,15,36,88,141],"(RL)":[2],"agents":[3,32,126],"have":[4],"been":[5,80],"deployed":[6],"in":[7,22,72,83,89,112,127,166],"complex":[8],"environments":[9],"where":[10,129],"interactions":[11,28],"are":[12],"costly,":[13],"and":[14,56,106,139,191,206],"is":[16,25,212],"usually":[17],"slow.":[18],"One":[19],"prominent":[20],"task":[21],"these":[23,164,200],"settings":[24,128],"to":[26,33,45,102,173],"reuse":[27],"performed":[29],"by":[30,135],"other":[31],"accelerate":[34],"the":[35,47,60,73,84,119,155,175,217],"process.":[37],"Causal":[38],"inference":[39,77,98],"provides":[40],"a":[41,52,100,104,167],"family":[42],"of":[43,49,54,66,86,110,121,194],"methods":[44,221],"infer":[46],"effects":[48,131],"actions":[50],"from":[51],"combination":[53],"data":[55],"qualitative":[57],"assumptions":[58],"about":[59],"underlying":[61],"environment.":[62],"Despite":[63],"its":[64],"success":[65],"transferring":[67,122],"invariant":[68],"knowledge":[69,111,123],"across":[70,124],"domains":[71],"empirical":[74],"sciences,":[75],"causal":[76,97,130],"has":[78],"not":[79],"fully":[81],"realized":[82],"context":[85],"transfer":[87,109],"interactive":[90],"domains.":[91],"In":[92,115],"this":[93],"paper,":[94],"we":[95,117,203],"use":[96],"as":[99,172],"basis":[101],"support":[103],"principled":[105],"more":[107,178,214],"robust":[108],"RL":[113],"settings.":[114],"particular,":[116],"tackle":[118],"problem":[120],"bandit":[125],"cannot":[132],"be":[133],"identified":[134],"do-calculus":[136],"[Pearl,":[137],"2000]":[138],"standard":[140],"techniques.":[142],"Our":[143],"new":[144],"identification":[145],"strategy":[146,186,211],"combines":[147],"two":[148],"steps":[149],"--":[150],"first,":[151],"deriving":[152],"bounds":[153,165],"over":[154],"arm\u2019s":[156],"distribution":[157],"based":[158],"on":[159],"structural":[160],"knowledge;":[161],"second,":[162],"incorporating":[163],"dynamic":[168],"allocation":[169],"procedure":[170],"so":[171],"guide":[174],"search":[176],"towards":[177],"promising":[179],"actions.":[180],"We":[181],"formally":[182],"prove":[183],"that":[184,209],"our":[185,210],"dominates":[187],"previously":[188],"known":[189],"algorithms":[190],"achieves":[192],"orders":[193],"magnitude":[195],"faster":[196],"convergence":[197],"rates":[198],"than":[199,216],"algorithms.":[201],"Finally,":[202],"perform":[204],"simulations":[205],"empirically":[207],"demonstrate":[208],"consistently":[213],"efficient":[215],"current":[218],"(non-causal)":[219],"state-of-the-art":[220]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":6},{"year":2023,"cited_by_count":4},{"year":2022,"cited_by_count":4},{"year":2021,"cited_by_count":9},{"year":2020,"cited_by_count":6},{"year":2019,"cited_by_count":3},{"year":2018,"cited_by_count":3}],"updated_date":"2026-05-21T06:26:12.895304","created_date":"2025-10-10T00:00:00"}
