{"id":"https://openalex.org/W2583568739","doi":"https://doi.org/10.1109/bigdata.2016.7840637","title":"Sampling-based distributed Kernel mean matching using spark","display_name":"Sampling-based distributed Kernel mean matching using spark","publication_year":2016,"publication_date":"2016-12-01","ids":{"openalex":"https://openalex.org/W2583568739","doi":"https://doi.org/10.1109/bigdata.2016.7840637","mag":"2583568739"},"language":"en","primary_location":{"id":"doi:10.1109/bigdata.2016.7840637","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata.2016.7840637","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2016 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5016050605","display_name":"Ahsanul Haque","orcid":null},"institutions":[{"id":"https://openalex.org/I162577319","display_name":"The University of Texas at Dallas","ror":"https://ror.org/049emcs32","country_code":"US","type":"education","lineage":["https://openalex.org/I162577319"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ahsanul Haque","raw_affiliation_strings":["Department of Computer Science, The University of Texas at Dallas, Richardson, TX, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Computer Science, The University of Texas at Dallas, Richardson, TX, USA","institution_ids":["https://openalex.org/I162577319"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5012907875","display_name":"Zhuoyi Wang","orcid":"https://orcid.org/0000-0002-1058-2791"},"institutions":[{"id":"https://openalex.org/I162577319","display_name":"The University of Texas at Dallas","ror":"https://ror.org/049emcs32","country_code":"US","type":"education","lineage":["https://openalex.org/I162577319"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Zhuoyi Wang","raw_affiliation_strings":["Department of Computer Science, The University of Texas at Dallas, Richardson, TX, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Computer Science, The University of Texas at Dallas, Richardson, TX, USA","institution_ids":["https://openalex.org/I162577319"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056179290","display_name":"Swarup Chandra","orcid":null},"institutions":[{"id":"https://openalex.org/I162577319","display_name":"The University of Texas at Dallas","ror":"https://ror.org/049emcs32","country_code":"US","type":"education","lineage":["https://openalex.org/I162577319"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Swarup Chandra","raw_affiliation_strings":["Department of Computer Science, The University of Texas at Dallas, Richardson, TX, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Computer Science, The University of Texas at Dallas, Richardson, TX, USA","institution_ids":["https://openalex.org/I162577319"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101103417","display_name":"Yupeng Gao","orcid":null},"institutions":[{"id":"https://openalex.org/I162577319","display_name":"The University of Texas at Dallas","ror":"https://ror.org/049emcs32","country_code":"US","type":"education","lineage":["https://openalex.org/I162577319"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yupeng Gao","raw_affiliation_strings":["Department of Computer Science, The University of Texas at Dallas, Richardson, TX, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Computer Science, The University of Texas at Dallas, Richardson, TX, USA","institution_ids":["https://openalex.org/I162577319"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5005002693","display_name":"Latifur Khan","orcid":"https://orcid.org/0000-0002-9300-1576"},"institutions":[{"id":"https://openalex.org/I162577319","display_name":"The University of Texas at Dallas","ror":"https://ror.org/049emcs32","country_code":"US","type":"education","lineage":["https://openalex.org/I162577319"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Latifur Khan","raw_affiliation_strings":["Department of Computer Science, The University of Texas at Dallas, Richardson, TX, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Computer Science, The University of Texas at Dallas, Richardson, TX, USA","institution_ids":["https://openalex.org/I162577319"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5028089542","display_name":"Char\u0173 C. Aggarwal","orcid":"https://orcid.org/0000-0003-2579-7581"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Charu Aggarwal","raw_affiliation_strings":["IBM T. J. Watson Research Center, Yorktown, NY, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"IBM T. J. Watson Research Center, Yorktown, NY, USA","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.4416,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.81225938,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":95},"biblio":{"volume":"10","issue":null,"first_page":"462","last_page":"471"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12761","display_name":"Data Stream Mining Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12761","display_name":"Data Stream Mining Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11512","display_name":"Anomaly Detection Techniques and Applications","score":0.9977999925613403,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12205","display_name":"Time Series Analysis and Forecasting","score":0.9940999746322632,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/covariate","display_name":"Covariate","score":0.7674911022186279},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.7165214419364929},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7141799330711365},{"id":"https://openalex.org/keywords/spark","display_name":"SPARK (programming language)","score":0.6630709171295166},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.606438159942627},{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.6023567318916321},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.5794452428817749},{"id":"https://openalex.org/keywords/sampling","display_name":"Sampling (signal processing)","score":0.5387910604476929},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.5004785060882568},{"id":"https://openalex.org/keywords/kernel-density-estimation","display_name":"Kernel density estimation","score":0.42077505588531494},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.39968395233154297},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.2373112440109253},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.1413441300392151}],"concepts":[{"id":"https://openalex.org/C119043178","wikidata":"https://www.wikidata.org/wiki/Q320723","display_name":"Covariate","level":2,"score":0.7674911022186279},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.7165214419364929},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7141799330711365},{"id":"https://openalex.org/C2781215313","wikidata":"https://www.wikidata.org/wiki/Q3493345","display_name":"SPARK (programming language)","level":2,"score":0.6630709171295166},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.606438159942627},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.6023567318916321},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.5794452428817749},{"id":"https://openalex.org/C140779682","wikidata":"https://www.wikidata.org/wiki/Q210868","display_name":"Sampling (signal processing)","level":3,"score":0.5387910604476929},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.5004785060882568},{"id":"https://openalex.org/C71134354","wikidata":"https://www.wikidata.org/wiki/Q458825","display_name":"Kernel density estimation","level":3,"score":0.42077505588531494},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.39968395233154297},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.2373112440109253},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.1413441300392151},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.0},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.0},{"id":"https://openalex.org/C185429906","wikidata":"https://www.wikidata.org/wiki/Q1130160","display_name":"Estimator","level":2,"score":0.0},{"id":"https://openalex.org/C106131492","wikidata":"https://www.wikidata.org/wiki/Q3072260","display_name":"Filter (signal processing)","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/bigdata.2016.7840637","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata.2016.7840637","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2016 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":42,"referenced_works":["https://openalex.org/W123476658","https://openalex.org/W189742998","https://openalex.org/W1904826605","https://openalex.org/W1966000522","https://openalex.org/W1969080239","https://openalex.org/W2032536435","https://openalex.org/W2067707835","https://openalex.org/W2102689555","https://openalex.org/W2103851188","https://openalex.org/W2107250100","https://openalex.org/W2111272908","https://openalex.org/W2111362445","https://openalex.org/W2112483442","https://openalex.org/W2117897510","https://openalex.org/W2118585731","https://openalex.org/W2135335717","https://openalex.org/W2165660393","https://openalex.org/W2189465200","https://openalex.org/W2249161032","https://openalex.org/W2267835966","https://openalex.org/W2434851943","https://openalex.org/W2470412537","https://openalex.org/W2532640750","https://openalex.org/W3003253354","https://openalex.org/W3022547535","https://openalex.org/W3120740533","https://openalex.org/W4244777963","https://openalex.org/W4381059462","https://openalex.org/W6604957493","https://openalex.org/W6675410418","https://openalex.org/W6675547039","https://openalex.org/W6676141320","https://openalex.org/W6676840641","https://openalex.org/W6676870182","https://openalex.org/W6677069268","https://openalex.org/W6677656871","https://openalex.org/W6680192438","https://openalex.org/W6684485356","https://openalex.org/W6687322159","https://openalex.org/W6693772185","https://openalex.org/W6719935260","https://openalex.org/W6788247690"],"related_works":["https://openalex.org/W2985746494","https://openalex.org/W4206042385","https://openalex.org/W2511384863","https://openalex.org/W2080773131","https://openalex.org/W2096089271","https://openalex.org/W2923628599","https://openalex.org/W2014100433","https://openalex.org/W2051519658","https://openalex.org/W4308507533","https://openalex.org/W2407107767"],"abstract_inverted_index":{"Limited":[0],"access":[1],"to":[2,97,108,159],"supervised":[3],"information":[4],"may":[5],"forge":[6],"scenarios":[7],"in":[8,51,81],"real-world":[9],"data":[10,17,35,44,171],"mining":[11,36],"applications,":[12],"where":[13],"training":[14,41,72,85,133,170],"and":[15,42,125],"test":[16,43],"are":[18],"interconnected":[19],"by":[20,70],"a":[21,54,62,105,128,174],"covariate":[22,32,55,68],"shift,":[23],"i.e.,":[24],"having":[25],"equal":[26],"class":[27],"conditional":[28],"distribution":[29],"with":[30,177],"unequal":[31],"distribution.":[33],"Traditional":[34],"techniques":[37],"assume":[38],"that":[39,66,119,145,166],"both":[40],"represent":[45],"an":[46],"identical":[47],"distribution,":[48],"therefore":[49,126],"suffer":[50],"presence":[52],"of":[53,84,114,169,183],"shift.":[56],"Kernel":[57],"Mean":[58],"Matching":[59],"(KMM)":[60],"is":[61,88,122],"well":[63],"known":[64],"approach":[65,121,148],"addresses":[67],"shift":[69],"weighing":[71],"instances":[73],"appropriately.":[74],"However,":[75],"it":[76,164],"has":[77],"time":[78,157,182],"complexity":[79],"cubic":[80],"the":[82,110,120,146,160,184],"size":[83,168],"data,":[86],"which":[87],"computationally":[89],"impractical":[90],"for":[91,131],"large":[92],"or":[93],"streaming":[94],"datasets":[95,143],"due":[96],"limited":[98,111],"scalability.":[99],"In":[100],"this":[101],"paper,":[102],"we":[103,117],"present":[104],"sampling-based":[106],"algorithm":[107,130],"address":[109],"scalability":[112],"problem":[113],"KMM.":[115],"Moreover,":[116,163],"show":[118,144],"highly":[123],"parallelizable,":[124],"propose":[127],"distributed":[129],"estimating":[132],"instance":[134],"weights":[135],"efficiently":[136],"using":[137],"Spark.":[138],"Experiment":[139],"results":[140,172],"on":[141,180],"benchmark":[142],"proposed":[147,185],"achieves":[149],"competitive":[150],"estimation":[151],"accuracy":[152,176],"within":[153],"much":[154],"lower":[155],"execution":[156,181],"compared":[158],"KMM":[161],"algorithm.":[162],"indicates":[165],"larger":[167],"into":[173],"higher":[175],"minimal":[178],"effect":[179],"approach.":[186]},"counts_by_year":[{"year":2023,"cited_by_count":1},{"year":2021,"cited_by_count":2},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
