{"id":"https://openalex.org/W2995509045","doi":"https://doi.org/10.24963/ijcai.2021/461","title":"Independence-aware Advantage Estimation","display_name":"Independence-aware Advantage Estimation","publication_year":2021,"publication_date":"2021-08-01","ids":{"openalex":"https://openalex.org/W2995509045","doi":"https://doi.org/10.24963/ijcai.2021/461","mag":"2995509045"},"language":"en","primary_location":{"id":"doi:10.24963/ijcai.2021/461","is_oa":true,"landing_page_url":"https://doi.org/10.24963/ijcai.2021/461","pdf_url":"https://www.ijcai.org/proceedings/2021/0461.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Thirtieth International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.ijcai.org/proceedings/2021/0461.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5048462355","display_name":"Pushi Zhang","orcid":null},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Pushi Zhang","raw_affiliation_strings":["Tsinghua University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Tsinghua University","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101858782","display_name":"Li Zhao","orcid":"https://orcid.org/0000-0001-6918-0204"},"institutions":[{"id":"https://openalex.org/I4210113369","display_name":"Microsoft Research Asia (China)","ror":"https://ror.org/0300m5276","country_code":"CN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210113369"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Li Zhao","raw_affiliation_strings":["Microsoft Research Asia"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Microsoft Research Asia","institution_ids":["https://openalex.org/I4210113369"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100731912","display_name":"Guoqing Liu","orcid":"https://orcid.org/0000-0003-4110-7616"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Guoqing Liu","raw_affiliation_strings":["University of Science and Technology of China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101544241","display_name":"Jiang Bian","orcid":"https://orcid.org/0000-0002-9472-600X"},"institutions":[{"id":"https://openalex.org/I4210113369","display_name":"Microsoft Research Asia (China)","ror":"https://ror.org/0300m5276","country_code":"CN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210113369"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiang Bian","raw_affiliation_strings":["Microsoft Research Asia"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Microsoft Research Asia","institution_ids":["https://openalex.org/I4210113369"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5044042138","display_name":"Minlie Huang","orcid":"https://orcid.org/0000-0001-7111-1849"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Minlie Huang","raw_affiliation_strings":["Tsinghua University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Tsinghua University","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5020025718","display_name":"Tao Qin","orcid":"https://orcid.org/0000-0002-9095-0776"},"institutions":[{"id":"https://openalex.org/I4210113369","display_name":"Microsoft Research Asia (China)","ror":"https://ror.org/0300m5276","country_code":"CN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210113369"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Tao Qin","raw_affiliation_strings":["Microsoft Research Asia"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Microsoft Research Asia","institution_ids":["https://openalex.org/I4210113369"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101884287","display_name":"Tie\u2010Yan Liu","orcid":"https://orcid.org/0000-0002-0476-8020"},"institutions":[{"id":"https://openalex.org/I4210113369","display_name":"Microsoft Research Asia (China)","ror":"https://ror.org/0300m5276","country_code":"CN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210113369"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Tie-Yan Liu","raw_affiliation_strings":["Microsoft Research Asia"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Microsoft Research Asia","institution_ids":["https://openalex.org/I4210113369"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.00421219,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"3349","last_page":"3355"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11975","display_name":"Evolutionary Algorithms and Applications","score":0.9926999807357788,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11663","display_name":"Viral Infectious Diseases and Gene Expression in Insects","score":0.9742000102996826,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/variance","display_name":"Variance (accounting)","score":0.7684001922607422},{"id":"https://openalex.org/keywords/estimator","display_name":"Estimator","score":0.7323302030563354},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6863864660263062},{"id":"https://openalex.org/keywords/monte-carlo-method","display_name":"Monte Carlo method","score":0.6540966033935547},{"id":"https://openalex.org/keywords/independence","display_name":"Independence (probability theory)","score":0.6468302011489868},{"id":"https://openalex.org/keywords/control-variates","display_name":"Control variates","score":0.6015535593032837},{"id":"https://openalex.org/keywords/importance-sampling","display_name":"Importance sampling","score":0.5532361268997192},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.49268966913223267},{"id":"https://openalex.org/keywords/monte-carlo-integration","display_name":"Monte Carlo integration","score":0.4453222155570984},{"id":"https://openalex.org/keywords/bias-of-an-estimator","display_name":"Bias of an estimator","score":0.4246363639831543},{"id":"https://openalex.org/keywords/minimum-variance-unbiased-estimator","display_name":"Minimum-variance unbiased estimator","score":0.36712539196014404},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.3570426106452942},{"id":"https://openalex.org/keywords/econometrics","display_name":"Econometrics","score":0.3409573435783386},{"id":"https://openalex.org/keywords/markov-chain-monte-carlo","display_name":"Markov chain Monte Carlo","score":0.31302833557128906},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.2677954435348511},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.24623817205429077},{"id":"https://openalex.org/keywords/hybrid-monte-carlo","display_name":"Hybrid Monte Carlo","score":0.13677066564559937}],"concepts":[{"id":"https://openalex.org/C196083921","wikidata":"https://www.wikidata.org/wiki/Q7915758","display_name":"Variance (accounting)","level":2,"score":0.7684001922607422},{"id":"https://openalex.org/C185429906","wikidata":"https://www.wikidata.org/wiki/Q1130160","display_name":"Estimator","level":2,"score":0.7323302030563354},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6863864660263062},{"id":"https://openalex.org/C19499675","wikidata":"https://www.wikidata.org/wiki/Q232207","display_name":"Monte Carlo method","level":2,"score":0.6540966033935547},{"id":"https://openalex.org/C35651441","wikidata":"https://www.wikidata.org/wiki/Q625303","display_name":"Independence (probability theory)","level":2,"score":0.6468302011489868},{"id":"https://openalex.org/C121683094","wikidata":"https://www.wikidata.org/wiki/Q3554721","display_name":"Control variates","level":5,"score":0.6015535593032837},{"id":"https://openalex.org/C52740198","wikidata":"https://www.wikidata.org/wiki/Q1539564","display_name":"Importance sampling","level":3,"score":0.5532361268997192},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.49268966913223267},{"id":"https://openalex.org/C132725507","wikidata":"https://www.wikidata.org/wiki/Q39879","display_name":"Monte Carlo integration","level":5,"score":0.4453222155570984},{"id":"https://openalex.org/C191393472","wikidata":"https://www.wikidata.org/wiki/Q15222032","display_name":"Bias of an estimator","level":4,"score":0.4246363639831543},{"id":"https://openalex.org/C165646398","wikidata":"https://www.wikidata.org/wiki/Q3755281","display_name":"Minimum-variance unbiased estimator","level":3,"score":0.36712539196014404},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3570426106452942},{"id":"https://openalex.org/C149782125","wikidata":"https://www.wikidata.org/wiki/Q160039","display_name":"Econometrics","level":1,"score":0.3409573435783386},{"id":"https://openalex.org/C111350023","wikidata":"https://www.wikidata.org/wiki/Q1191869","display_name":"Markov chain Monte Carlo","level":3,"score":0.31302833557128906},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.2677954435348511},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.24623817205429077},{"id":"https://openalex.org/C13153151","wikidata":"https://www.wikidata.org/wiki/Q1639846","display_name":"Hybrid Monte Carlo","level":4,"score":0.13677066564559937},{"id":"https://openalex.org/C144133560","wikidata":"https://www.wikidata.org/wiki/Q4830453","display_name":"Business","level":0,"score":0.0},{"id":"https://openalex.org/C121955636","wikidata":"https://www.wikidata.org/wiki/Q4116214","display_name":"Accounting","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.24963/ijcai.2021/461","is_oa":true,"landing_page_url":"https://doi.org/10.24963/ijcai.2021/461","pdf_url":"https://www.ijcai.org/proceedings/2021/0461.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Thirtieth International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.24963/ijcai.2021/461","is_oa":true,"landing_page_url":"https://doi.org/10.24963/ijcai.2021/461","pdf_url":"https://www.ijcai.org/proceedings/2021/0461.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Thirtieth International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2995509045.pdf","grobid_xml":"https://content.openalex.org/works/W2995509045.grobid-xml"},"referenced_works_count":20,"referenced_works":["https://openalex.org/W1191599655","https://openalex.org/W1771410628","https://openalex.org/W2136602922","https://openalex.org/W2155027007","https://openalex.org/W2569805627","https://openalex.org/W2625967765","https://openalex.org/W2736601468","https://openalex.org/W2786303200","https://openalex.org/W2787938642","https://openalex.org/W2951470703","https://openalex.org/W2952191563","https://openalex.org/W2963457007","https://openalex.org/W2963864421","https://openalex.org/W2964043796","https://openalex.org/W2970811133","https://openalex.org/W3168837055","https://openalex.org/W4287998216","https://openalex.org/W4289760659","https://openalex.org/W4293415974","https://openalex.org/W4298857966"],"related_works":["https://openalex.org/W2349547417","https://openalex.org/W4237435333","https://openalex.org/W4210503132","https://openalex.org/W2999390738","https://openalex.org/W2352602506","https://openalex.org/W3020567546","https://openalex.org/W1979154598","https://openalex.org/W1966798441","https://openalex.org/W2065756054","https://openalex.org/W4234882310"],"abstract_inverted_index":{"Most":[0],"of":[1,14,53,93],"existing":[2,106,131],"advantage":[3,55,73,132],"function":[4],"estimation":[5,118,133],"methods":[6,134],"in":[7,41,135],"reinforcement":[8],"learning":[9],"suffer":[10],"from":[11],"the":[12,21,32,51,54,59,80,91,94,99,117],"problem":[13],"high":[15,95],"variance,":[16],"which":[17,43],"scales":[18],"unfavorably":[19],"with":[20,75,105,130],"time":[22],"horizon.":[23],"To":[24,88],"address":[25],"this":[26],"challenge,":[27],"we":[28,102],"propose":[29],"to":[30,48,67],"identify":[31],"independence":[33,61],"property":[34,62],"between":[35],"current":[36],"action":[37],"and":[38],"future":[39],"states":[40],"environments,":[42],"can":[44,63],"be":[45,64],"further":[46,89],"leveraged":[47],"effectively":[49],"reduce":[50],"variance":[52,77,96],"estimation.":[56],"In":[57],"particular,":[58],"recognized":[60],"naturally":[65],"utilized":[66],"construct":[68],"a":[69,85,110],"novel":[70],"importance":[71],"sampling":[72],"estimator":[74,108],"close-to-zero":[76],"even":[78],"when":[79],"Monte-Carlo":[81,107],"return":[82],"signal":[83],"yields":[84],"large":[86],"variance.":[87,119],"remove":[90],"risk":[92],"introduced":[97],"by":[98,115],"new":[100],"estimator,":[101],"combine":[103],"it":[104],"via":[109],"reward":[111],"decomposition":[112],"model":[113],"learned":[114],"minimizing":[116],"Experiments":[120],"demonstrate":[121],"that":[122],"our":[123],"method":[124],"achieves":[125],"higher":[126],"sample":[127],"efficiency":[128],"compared":[129],"complex":[136],"environments.":[137]},"counts_by_year":[{"year":2019,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
