{"id":"https://openalex.org/W4385567488","doi":"https://doi.org/10.1145/3580305.3599778","title":"Balancing Approach for Causal Inference at Scale","display_name":"Balancing Approach for Causal Inference at Scale","publication_year":2023,"publication_date":"2023-08-04","ids":{"openalex":"https://openalex.org/W4385567488","doi":"https://doi.org/10.1145/3580305.3599778"},"language":"en","primary_location":{"id":"doi:10.1145/3580305.3599778","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3580305.3599778","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 29th ACM SIGKDD Conference on Knowledge Discovery and Data Mining","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5103093612","display_name":"S. Y. Lin","orcid":"https://orcid.org/0009-0006-8062-5757"},"institutions":[{"id":"https://openalex.org/I4210142583","display_name":"Snap (United States)","ror":"https://ror.org/04dgkhg68","country_code":"US","type":"company","lineage":["https://openalex.org/I4210142583"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Sicheng Lin","raw_affiliation_strings":["Snap Inc., Santa Monica, CA, USA"],"affiliations":[{"raw_affiliation_string":"Snap Inc., Santa Monica, CA, USA","institution_ids":["https://openalex.org/I4210142583"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102901230","display_name":"Meng Xu","orcid":"https://orcid.org/0009-0002-3655-7540"},"institutions":[{"id":"https://openalex.org/I4210142583","display_name":"Snap (United States)","ror":"https://ror.org/04dgkhg68","country_code":"US","type":"company","lineage":["https://openalex.org/I4210142583"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Meng Xu","raw_affiliation_strings":["Snap Inc., Santa Monica, CA, USA"],"affiliations":[{"raw_affiliation_string":"Snap Inc., Santa Monica, CA, USA","institution_ids":["https://openalex.org/I4210142583"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101695397","display_name":"Zhang Xi","orcid":"https://orcid.org/0009-0004-1577-1016"},"institutions":[{"id":"https://openalex.org/I4210142583","display_name":"Snap (United States)","ror":"https://ror.org/04dgkhg68","country_code":"US","type":"company","lineage":["https://openalex.org/I4210142583"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Xi Zhang","raw_affiliation_strings":["Snap Inc., Santa Monica, CA, USA"],"affiliations":[{"raw_affiliation_string":"Snap Inc., Santa Monica, CA, USA","institution_ids":["https://openalex.org/I4210142583"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5015675683","display_name":"Shih-Kang Chao","orcid":"https://orcid.org/0000-0002-2303-8605"},"institutions":[{"id":"https://openalex.org/I76835614","display_name":"University of Missouri","ror":"https://ror.org/02ymw8z06","country_code":"US","type":"education","lineage":["https://openalex.org/I76835614"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Shih-Kang Chao","raw_affiliation_strings":["University of Missouri, Columbia, MO, USA"],"affiliations":[{"raw_affiliation_string":"University of Missouri, Columbia, MO, USA","institution_ids":["https://openalex.org/I76835614"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5083330656","display_name":"Y. K. Huang","orcid":"https://orcid.org/0000-0003-1925-3588"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ying-Kai Huang","raw_affiliation_strings":["Realtor.com, Santa Clara, CA, USA"],"affiliations":[{"raw_affiliation_string":"Realtor.com, Santa Clara, CA, USA","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101666398","display_name":"Xiaolin Shi","orcid":"https://orcid.org/0000-0002-3705-1552"},"institutions":[{"id":"https://openalex.org/I4210142583","display_name":"Snap (United States)","ror":"https://ror.org/04dgkhg68","country_code":"US","type":"company","lineage":["https://openalex.org/I4210142583"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Xiaolin Shi","raw_affiliation_strings":["Snap Inc., Santa Monica, CA, USA"],"affiliations":[{"raw_affiliation_string":"Snap Inc., Santa Monica, CA, USA","institution_ids":["https://openalex.org/I4210142583"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5103093612"],"corresponding_institution_ids":["https://openalex.org/I4210142583"],"apc_list":null,"apc_paid":null,"fwci":1.3895,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.82438662,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"4485","last_page":"4496"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10845","display_name":"Advanced Causal Inference Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/2613","display_name":"Statistics and Probability"},"field":{"id":"https://openalex.org/fields/26","display_name":"Mathematics"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10845","display_name":"Advanced Causal Inference Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/2613","display_name":"Statistics and Probability"},"field":{"id":"https://openalex.org/fields/26","display_name":"Mathematics"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10136","display_name":"Statistical Methods and Inference","score":0.9968000054359436,"subfield":{"id":"https://openalex.org/subfields/2613","display_name":"Statistics and Probability"},"field":{"id":"https://openalex.org/fields/26","display_name":"Mathematics"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11235","display_name":"Statistical Methods in Clinical Trials","score":0.9868999719619751,"subfield":{"id":"https://openalex.org/subfields/2613","display_name":"Statistics and Probability"},"field":{"id":"https://openalex.org/fields/26","display_name":"Mathematics"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7796498537063599},{"id":"https://openalex.org/keywords/covariate","display_name":"Covariate","score":0.676381528377533},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.6416966915130615},{"id":"https://openalex.org/keywords/causal-inference","display_name":"Causal inference","score":0.6366240978240967},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.5347280502319336},{"id":"https://openalex.org/keywords/scale","display_name":"Scale (ratio)","score":0.4391508996486664},{"id":"https://openalex.org/keywords/load-balancing","display_name":"Load balancing (electrical power)","score":0.42699021100997925},{"id":"https://openalex.org/keywords/correctness","display_name":"Correctness","score":0.4169802665710449},{"id":"https://openalex.org/keywords/randomized-experiment","display_name":"Randomized experiment","score":0.4141896963119507},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.38931548595428467},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3296456038951874},{"id":"https://openalex.org/keywords/econometrics","display_name":"Econometrics","score":0.25866806507110596},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.24092823266983032},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.1973899006843567},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.17697396874427795},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.1090276837348938},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.10027053952217102}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7796498537063599},{"id":"https://openalex.org/C119043178","wikidata":"https://www.wikidata.org/wiki/Q320723","display_name":"Covariate","level":2,"score":0.676381528377533},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.6416966915130615},{"id":"https://openalex.org/C158600405","wikidata":"https://www.wikidata.org/wiki/Q5054566","display_name":"Causal inference","level":2,"score":0.6366240978240967},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.5347280502319336},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.4391508996486664},{"id":"https://openalex.org/C138959212","wikidata":"https://www.wikidata.org/wiki/Q1806783","display_name":"Load balancing (electrical power)","level":3,"score":0.42699021100997925},{"id":"https://openalex.org/C55439883","wikidata":"https://www.wikidata.org/wiki/Q360812","display_name":"Correctness","level":2,"score":0.4169802665710449},{"id":"https://openalex.org/C155108698","wikidata":"https://www.wikidata.org/wiki/Q1231081","display_name":"Randomized experiment","level":2,"score":0.4141896963119507},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.38931548595428467},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3296456038951874},{"id":"https://openalex.org/C149782125","wikidata":"https://www.wikidata.org/wiki/Q160039","display_name":"Econometrics","level":1,"score":0.25866806507110596},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.24092823266983032},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.1973899006843567},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.17697396874427795},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.1090276837348938},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.10027053952217102},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C187691185","wikidata":"https://www.wikidata.org/wiki/Q2020720","display_name":"Grid","level":2,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3580305.3599778","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3580305.3599778","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 29th ACM SIGKDD Conference on Knowledge Discovery and Data Mining","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/9","display_name":"Industry, innovation and infrastructure","score":0.4300000071525574}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":41,"referenced_works":["https://openalex.org/W1601608895","https://openalex.org/W1980287119","https://openalex.org/W2010505816","https://openalex.org/W2014373672","https://openalex.org/W2028040032","https://openalex.org/W2039811614","https://openalex.org/W2078639378","https://openalex.org/W2120817734","https://openalex.org/W2132324013","https://openalex.org/W2143416241","https://openalex.org/W2150291618","https://openalex.org/W2155163959","https://openalex.org/W2168639902","https://openalex.org/W2171443468","https://openalex.org/W2176874128","https://openalex.org/W2286797211","https://openalex.org/W2299232272","https://openalex.org/W2314734263","https://openalex.org/W2530751590","https://openalex.org/W2553763938","https://openalex.org/W2597503617","https://openalex.org/W2952127798","https://openalex.org/W2963709384","https://openalex.org/W3083648019","https://openalex.org/W3105979797","https://openalex.org/W3121065712","https://openalex.org/W3122193054","https://openalex.org/W3122781290","https://openalex.org/W3122812581","https://openalex.org/W3124410553","https://openalex.org/W3125057276","https://openalex.org/W3125958392","https://openalex.org/W3160659585","https://openalex.org/W3161326545","https://openalex.org/W3169407454","https://openalex.org/W4206633947","https://openalex.org/W4234712938","https://openalex.org/W4239728164","https://openalex.org/W4287808493","https://openalex.org/W4292363360","https://openalex.org/W4302779188"],"related_works":["https://openalex.org/W2905625059","https://openalex.org/W4389471064","https://openalex.org/W2119346805","https://openalex.org/W4322726883","https://openalex.org/W3005312434","https://openalex.org/W3199347757","https://openalex.org/W4313559754","https://openalex.org/W1730782591","https://openalex.org/W4388998033","https://openalex.org/W2510789027"],"abstract_inverted_index":{"With":[0],"the":[1,53,81,98,146,175,185,197],"modern":[2],"software":[3],"and":[4,65,100,112,121,130,160,172,205],"online":[5],"platforms":[6],"to":[7,59,83,155],"collect":[8],"massive":[9],"amount":[10],"of":[11,18,148,187],"data,":[12],"there":[13],"is":[14,29],"an":[15,207],"increasing":[16],"demand":[17],"applying":[19,188],"causal":[20,45,176,211],"inference":[21],"methods":[22,33,51,86],"at":[23,151,214],"large":[24,88,192],"scale":[25,89,193],"when":[26],"randomized":[27],"experimentation":[28],"not":[30],"viable.":[31],"Weighting":[32],"that":[34,165],"directly":[35],"incorporate":[36],"covariate":[37,71],"balancing":[38,116,119,149,189,198],"have":[39,126],"recently":[40],"gained":[41],"popularity":[42],"for":[43,77,114,210],"estimating":[44],"effects":[46],"in":[47,91,104,135,174],"observational":[48],"studies.":[49],"These":[50],"reduce":[52],"manual":[54],"efforts":[55],"required":[56],"by":[57],"researchers":[58],"iterate":[60],"between":[61],"propensity":[62],"score":[63],"modeling":[64],"balance":[66,72],"checking":[67],"until":[68],"a":[69,201],"satisfied":[70],"result.":[73],"However,":[74],"conventional":[75],"solvers":[76,125],"determining":[78],"weights":[79],"lack":[80],"scalability":[82],"apply":[84],"such":[85,139],"on":[87,191],"datasets":[90],"companies":[92],"like":[93],"Snap":[94,215],"Inc.":[95,216],"To":[96],"address":[97],"limitations":[99],"improve":[101],"computational":[102],"efficiency,":[103],"this":[105],"paper":[106],"we":[107],"present":[108],"scalable":[109],"algorithms,":[110],"DistEB":[111],"DistMS,":[113],"two":[115],"approaches:":[117],"entropy":[118],"[14]":[120],"MicroSynth":[122],"[33].":[123],"The":[124,182],"linear":[127],"time":[128],"complexity":[129],"can":[131],"be":[132],"conveniently":[133],"implemented":[134],"distributed":[136],"computing":[137],"frameworks":[138],"as":[140],"Spark,":[141],"Hive,":[142],"etc.":[143],"We":[144,163,195],"study":[145],"properties":[147],"approaches":[150,190],"different":[152],"scales":[153],"up":[154],"1":[156],"million":[157],"treated":[158],"units":[159],"487":[161],"covariates.":[162],"find":[164],"with":[166,200],"larger":[167],"sample":[168],"size,":[169],"both":[170],"bias":[171],"variance":[173],"effect":[177],"estimation":[178,213],"are":[179],"significantly":[180],"reduced.":[181],"results":[183],"emphasize":[184],"importance":[186],"datasets.":[194],"combine":[196],"approach":[199],"synthetic":[202],"control":[203],"framework":[204],"deploy":[206],"end-to-end":[208],"system":[209],"impact":[212]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
