{"id":"https://openalex.org/W4283367762","doi":"https://doi.org/10.14778/3523210.3523223","title":"Selective data acquisition in the wild for model charging","display_name":"Selective data acquisition in the wild for model charging","publication_year":2022,"publication_date":"2022-03-01","ids":{"openalex":"https://openalex.org/W4283367762","doi":"https://doi.org/10.14778/3523210.3523223"},"language":"en","primary_location":{"id":"doi:10.14778/3523210.3523223","is_oa":false,"landing_page_url":"https://doi.org/10.14778/3523210.3523223","pdf_url":null,"source":{"id":"https://openalex.org/S4210226185","display_name":"Proceedings of the VLDB Endowment","issn_l":"2150-8097","issn":["2150-8097"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the VLDB Endowment","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101797040","display_name":"Chengliang Chai","orcid":"https://orcid.org/0000-0001-8080-5594"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Chengliang Chai","raw_affiliation_strings":["Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100659171","display_name":"Jiabin Liu","orcid":"https://orcid.org/0000-0001-6914-8941"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiabin Liu","raw_affiliation_strings":["Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101824160","display_name":"Nan Tang","orcid":"https://orcid.org/0000-0003-2832-0295"},"institutions":[{"id":"https://openalex.org/I4210138380","display_name":"Qatar Cardiovascular Research Center","ror":"https://ror.org/038vyt185","country_code":"QA","type":"healthcare","lineage":["https://openalex.org/I4210138380"]}],"countries":["QA"],"is_corresponding":false,"raw_author_name":"Nan Tang","raw_affiliation_strings":["QCRI, Doha, Qatar"],"affiliations":[{"raw_affiliation_string":"QCRI, Doha, Qatar","institution_ids":["https://openalex.org/I4210138380"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100451576","display_name":"Guoliang Li","orcid":"https://orcid.org/0000-0002-1398-0621"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Guoliang Li","raw_affiliation_strings":["Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100614732","display_name":"Yuyu Luo","orcid":"https://orcid.org/0000-0001-9530-3327"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuyu Luo","raw_affiliation_strings":["Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5101797040"],"corresponding_institution_ids":["https://openalex.org/I99065089"],"apc_list":null,"apc_paid":null,"fwci":6.0718,"has_fulltext":false,"cited_by_count":45,"citation_normalized_percentile":{"value":0.96774243,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":97,"max":100},"biblio":{"volume":"15","issue":"7","first_page":"1466","last_page":"1478"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12761","display_name":"Data Stream Mining Techniques","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12761","display_name":"Data Stream Mining Techniques","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12072","display_name":"Machine Learning and Algorithms","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/bottleneck","display_name":"Bottleneck","score":0.7705093622207642},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7560949325561523},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.6198724508285522},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5651724338531494},{"id":"https://openalex.org/keywords/schema","display_name":"Schema (genetic algorithms)","score":0.5389460325241089},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5149251222610474},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.48425671458244324},{"id":"https://openalex.org/keywords/data-set","display_name":"Data set","score":0.4557490050792694},{"id":"https://openalex.org/keywords/data-warehouse","display_name":"Data warehouse","score":0.4541451930999756},{"id":"https://openalex.org/keywords/data-point","display_name":"Data point","score":0.43136849999427795},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3245202898979187},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.2809021770954132},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.08423355221748352}],"concepts":[{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.7705093622207642},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7560949325561523},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.6198724508285522},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5651724338531494},{"id":"https://openalex.org/C52146309","wikidata":"https://www.wikidata.org/wiki/Q7431116","display_name":"Schema (genetic algorithms)","level":2,"score":0.5389460325241089},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5149251222610474},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.48425671458244324},{"id":"https://openalex.org/C58489278","wikidata":"https://www.wikidata.org/wiki/Q1172284","display_name":"Data set","level":2,"score":0.4557490050792694},{"id":"https://openalex.org/C135572916","wikidata":"https://www.wikidata.org/wiki/Q193351","display_name":"Data warehouse","level":2,"score":0.4541451930999756},{"id":"https://openalex.org/C21080849","wikidata":"https://www.wikidata.org/wiki/Q13611879","display_name":"Data point","level":2,"score":0.43136849999427795},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3245202898979187},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2809021770954132},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.08423355221748352},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.14778/3523210.3523223","is_oa":false,"landing_page_url":"https://doi.org/10.14778/3523210.3523223","pdf_url":null,"source":{"id":"https://openalex.org/S4210226185","display_name":"Proceedings of the VLDB Endowment","issn_l":"2150-8097","issn":["2150-8097"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the VLDB Endowment","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Partnerships for the goals","score":0.4000000059604645,"id":"https://metadata.un.org/sdg/17"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":35,"referenced_works":["https://openalex.org/W1516061453","https://openalex.org/W1673310716","https://openalex.org/W1860787181","https://openalex.org/W1966827292","https://openalex.org/W1975672287","https://openalex.org/W2015245929","https://openalex.org/W2022686119","https://openalex.org/W2043772275","https://openalex.org/W2049633694","https://openalex.org/W2067566391","https://openalex.org/W2123981539","https://openalex.org/W2124509324","https://openalex.org/W2145339207","https://openalex.org/W2257979135","https://openalex.org/W2428834396","https://openalex.org/W2740924709","https://openalex.org/W2798664493","https://openalex.org/W2808345493","https://openalex.org/W2975583675","https://openalex.org/W3005666391","https://openalex.org/W3014074327","https://openalex.org/W3030674916","https://openalex.org/W3032848711","https://openalex.org/W3034251946","https://openalex.org/W3037852608","https://openalex.org/W3161956575","https://openalex.org/W3171842281","https://openalex.org/W3174906424","https://openalex.org/W3196436244","https://openalex.org/W3197182341","https://openalex.org/W4235216760","https://openalex.org/W4288335875","https://openalex.org/W6631277298","https://openalex.org/W6737551659","https://openalex.org/W6767887823"],"related_works":["https://openalex.org/W1657880117","https://openalex.org/W2595172197","https://openalex.org/W2127970246","https://openalex.org/W2084856301","https://openalex.org/W1001352512","https://openalex.org/W4382618745","https://openalex.org/W2885125400","https://openalex.org/W2522980826","https://openalex.org/W2745792676","https://openalex.org/W4206278474"],"abstract_inverted_index":{"The":[0,92,138],"lack":[1],"of":[2,87,114,271],"sufficient":[3],"labeled":[4,66],"data":[5,30,45,51,54,56,67,71,78,132,145,165,188,199,213],"is":[6,63,95,141,171],"a":[7,25,40,112,179,218,248,254],"key":[8],"bottleneck":[9],"for":[10,35,175],"practitioners":[11],"in":[12,32,46,72,111],"many":[13],"real-world":[14],"supervised":[15,41],"machine":[16],"learning":[17,234,259],"(ML)":[18],"tasks.":[19],"In":[20],"this":[21],"paper,":[22],"we":[23,162,177],"study":[24],"new":[26],"problem,":[27],"namely":[28],"selective":[29],"acquisition":[31],"the":[33,47,61,70,73,82,221,225,230,236,244,269],"wild":[34,48,74],"model":[36],"charging":[37],":":[38],"given":[39],"ML":[42,83],"task":[43],"and":[44,58,125,227,253,265],"(e.g.,":[49],"enterprise":[50],"warehouses,":[52],"online":[53],"repositories,":[55],"markets,":[57],"so":[59],"on),":[60],"problem":[62],"to":[64,96,142,210],"select":[65,143],"points":[68,133,146,189,200,214],"from":[69,122,147,190,201,220,235],"as":[75],"additional":[76],"train":[77],"that":[79,194],"can":[80,136],"help":[81],"task.":[84],"It":[85,184,204],"consists":[86],"two":[88],"steps":[89],"(Fig.":[90],"1).":[91],"first":[93,185],"step":[94,140,160,173],"discover":[97],"relevant":[98],"datasets":[99,120,150,192,267],"(":[100,215,238],"e.g.":[101],",":[102,217,240],"tables":[103],"with":[104],"similar":[105,198],"relational":[106,264],"schema),":[107],"which":[108,144,176,208],"will":[109],"result":[110],"set":[113],"candidate":[115,119,149,191],"datasets.":[116],"Because":[117],"these":[118,148],"come":[121],"different":[123,128,202],"sources":[124],"may":[126],"follow":[127],"distributions,":[129],"not":[130],"all":[131,187],"they":[134],"contain":[135],"help.":[137],"second":[139],"should":[151],"be":[152],"used.":[153],"We":[154,246],"build":[155],"an":[156],"end-to-end":[157],"solution.":[158,260],"For":[159],"1,":[161],"piggyback":[163],"off-the-shelf":[164],"discovery":[166],"tools.":[167],"Technically,":[168],"our":[169,272],"focus":[170],"on":[172,243],"2,":[174],"propose":[178,247],"solution":[180,252],"framework":[181],"called":[182],"AutoData.":[183],"clusters":[186],"such":[193],"each":[195],"cluster":[196,209],"contains":[197],"sources.":[203],"then":[205,228],"iteratively":[206],"picks":[207],"use,":[211],"samples":[212],"i.e.":[216,239],"mini-batch)":[219],"picked":[222],"cluster,":[223],"evaluates":[224],"mini-batch,":[226],"revises":[229],"search":[231],"criteria":[232],"by":[233],"feedback":[237],"reward)":[241],"based":[242,251],"evaluation.":[245],"multi-armed":[249],"bandit":[250],"Deep":[255],"Q":[256],"Networks-based":[257],"reinforcement":[258],"Experiments":[261],"using":[262],"both":[263],"image":[266],"show":[268],"effectiveness":[270],"solutions.":[273]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":10},{"year":2024,"cited_by_count":16},{"year":2023,"cited_by_count":7},{"year":2022,"cited_by_count":11}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2025-10-10T00:00:00"}
