{"id":"https://openalex.org/W4312929313","doi":"https://doi.org/10.14778/3561261.3561267","title":"Coresets over multiple tables for feature-rich and data-efficient machine learning","display_name":"Coresets over multiple tables for feature-rich and data-efficient machine learning","publication_year":2022,"publication_date":"2022-09-01","ids":{"openalex":"https://openalex.org/W4312929313","doi":"https://doi.org/10.14778/3561261.3561267"},"language":"en","primary_location":{"id":"doi:10.14778/3561261.3561267","is_oa":false,"landing_page_url":"https://doi.org/10.14778/3561261.3561267","pdf_url":null,"source":{"id":"https://openalex.org/S4210226185","display_name":"Proceedings of the VLDB Endowment","issn_l":"2150-8097","issn":["2150-8097"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the VLDB Endowment","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100449263","display_name":"Jiayi Wang","orcid":"https://orcid.org/0000-0002-7785-3381"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Jiayi Wang","raw_affiliation_strings":["Tsinghua University"],"affiliations":[{"raw_affiliation_string":"Tsinghua University","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101797040","display_name":"Chengliang Chai","orcid":"https://orcid.org/0000-0001-8080-5594"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chengliang Chai","raw_affiliation_strings":["Tsinghua University"],"affiliations":[{"raw_affiliation_string":"Tsinghua University","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101824160","display_name":"Nan Tang","orcid":"https://orcid.org/0000-0003-2832-0295"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Nan Tang","raw_affiliation_strings":["QCRI"],"affiliations":[{"raw_affiliation_string":"QCRI","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100659171","display_name":"Jiabin Liu","orcid":"https://orcid.org/0000-0001-6914-8941"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiabin Liu","raw_affiliation_strings":["Tsinghua University"],"affiliations":[{"raw_affiliation_string":"Tsinghua University","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100451576","display_name":"Guoliang Li","orcid":"https://orcid.org/0000-0002-1398-0621"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Guoliang Li","raw_affiliation_strings":["Tsinghua University"],"affiliations":[{"raw_affiliation_string":"Tsinghua University","institution_ids":["https://openalex.org/I99065089"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5100449263"],"corresponding_institution_ids":["https://openalex.org/I99065089"],"apc_list":null,"apc_paid":null,"fwci":2.0391,"has_fulltext":false,"cited_by_count":21,"citation_normalized_percentile":{"value":0.88374523,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":"16","issue":"1","first_page":"64","last_page":"76"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10057","display_name":"Face and Expression Recognition","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10057","display_name":"Face and Expression Recognition","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.9965999722480774,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.994700014591217,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7621783018112183},{"id":"https://openalex.org/keywords/joins","display_name":"Joins","score":0.6585022211074829},{"id":"https://openalex.org/keywords/tuple","display_name":"Tuple","score":0.6480083465576172},{"id":"https://openalex.org/keywords/feature-selection","display_name":"Feature selection","score":0.6418782472610474},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.6310265064239502},{"id":"https://openalex.org/keywords/table","display_name":"Table (database)","score":0.5983543395996094},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5423697829246521},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.515268087387085},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.5056071281433105},{"id":"https://openalex.org/keywords/selection","display_name":"Selection (genetic algorithm)","score":0.4900890588760376},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.41481226682662964},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.11298134922981262},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.09730011224746704}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7621783018112183},{"id":"https://openalex.org/C2778692605","wikidata":"https://www.wikidata.org/wiki/Q4041866","display_name":"Joins","level":2,"score":0.6585022211074829},{"id":"https://openalex.org/C118930307","wikidata":"https://www.wikidata.org/wiki/Q600590","display_name":"Tuple","level":2,"score":0.6480083465576172},{"id":"https://openalex.org/C148483581","wikidata":"https://www.wikidata.org/wiki/Q446488","display_name":"Feature selection","level":2,"score":0.6418782472610474},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.6310265064239502},{"id":"https://openalex.org/C45235069","wikidata":"https://www.wikidata.org/wiki/Q278425","display_name":"Table (database)","level":2,"score":0.5983543395996094},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5423697829246521},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.515268087387085},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.5056071281433105},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.4900890588760376},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.41481226682662964},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.11298134922981262},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.09730011224746704},{"id":"https://openalex.org/C118615104","wikidata":"https://www.wikidata.org/wiki/Q121416","display_name":"Discrete mathematics","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.14778/3561261.3561267","is_oa":false,"landing_page_url":"https://doi.org/10.14778/3561261.3561267","pdf_url":null,"source":{"id":"https://openalex.org/S4210226185","display_name":"Proceedings of the VLDB Endowment","issn_l":"2150-8097","issn":["2150-8097"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the VLDB Endowment","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":35,"referenced_works":["https://openalex.org/W650854417","https://openalex.org/W2032775418","https://openalex.org/W2044849727","https://openalex.org/W2072750586","https://openalex.org/W2146635036","https://openalex.org/W2247380138","https://openalex.org/W2284514301","https://openalex.org/W2292590056","https://openalex.org/W2396309311","https://openalex.org/W2428834396","https://openalex.org/W2444650685","https://openalex.org/W2547190417","https://openalex.org/W2548695101","https://openalex.org/W2563724055","https://openalex.org/W2610871042","https://openalex.org/W2612619809","https://openalex.org/W2776050171","https://openalex.org/W2798499404","https://openalex.org/W2998216295","https://openalex.org/W3030674916","https://openalex.org/W3030764521","https://openalex.org/W3034486793","https://openalex.org/W3037852608","https://openalex.org/W3097225903","https://openalex.org/W3126220825","https://openalex.org/W3175777295","https://openalex.org/W3176803066","https://openalex.org/W3196436244","https://openalex.org/W3198076459","https://openalex.org/W4206064074","https://openalex.org/W4210494082","https://openalex.org/W4283367762","https://openalex.org/W4292070111","https://openalex.org/W6779630500","https://openalex.org/W6838136454"],"related_works":["https://openalex.org/W2073547112","https://openalex.org/W2121516976","https://openalex.org/W4293525103","https://openalex.org/W3200179079","https://openalex.org/W3163334550","https://openalex.org/W1601264907","https://openalex.org/W3087493185","https://openalex.org/W2048841640","https://openalex.org/W1497652558","https://openalex.org/W2013702744"],"abstract_inverted_index":{"Successful":[0],"machine":[1],"learning":[2],"(ML)":[3],"needs":[4],"to":[5,46,94,130,154,169,190,219,240],"learn":[6],"from":[7],"good":[8,24],"data.":[9,285],"However,":[10,119],"one":[11],"common":[12],"issue":[13],"about":[14],"train":[15,57,86,105,125,198,284],"data":[16,58,106],"for":[17,208],"ML":[18,80,97,142,148],"practitioners":[19],"is":[20,32,53,69,88,91,127,203,249],"the":[21,55,66,116,172,176,188,192,196,205,212,242,245,264,275,281],"lack":[22],"of":[23,104,187,195,211,223,244,270],"features.":[25],"To":[26],"mitigate":[27],"this":[28,134],"problem,":[29],"feature":[30,67,144,161,221,234],"augmentation":[31,68,145],"often":[33,92],"employed":[34],"by":[35,266],"joining":[36],"with":[37,82,252,280],"(or":[38,73,163],"enriching":[39],"features":[40],"from)":[41],"multiple":[42],"tables,":[43],"so":[44],"as":[45,114,278],"become":[47],"feature-rich":[48,141],"ML.":[49],"A":[50],"consequent":[51],"problem":[52],"that":[54,107,180,204,259],"enriched":[56],"may":[59],"contain":[60],"too":[61],"many":[62],"tuples,":[63],"especially":[64],"if":[65],"obtained":[70],"through":[71,143,149],"1":[72],"many)-to-many":[74],"or":[75],"fuzzy":[76],"joins.":[77],"Training":[78],"an":[79],"model":[81],"a":[83,101,123,160],"very":[84],"large":[85,124],"dataset":[87,126],"data-inefficient.":[89],"Coreset":[90],"used":[93],"achieve":[95],"data-efficient":[96,147],"training,":[98],"which":[99,248],"selects":[100],"small":[102],"subset":[103,189],"can":[108,215,237,262],"theoretically":[109],"and":[110,146],"practically":[111],"perform":[112],"similarly":[113],"using":[115],"full":[117,193],"dataset.":[118,199],"coreset":[120,150,157,173,181,209],"selection":[121,158,182,210],"over":[122,159],"also":[128],"known":[129],"be":[131,216,238],"time-consuming.":[132],"In":[133,152],"paper,":[135],"we":[136,167],"aim":[137],"at":[138],"achieving":[139],"both":[140],"selection.":[151],"order":[153],"avoid":[155],"time-consuming":[156],"augmented":[162,177,213,246,283],"fully":[164,282],"materialized)":[165],"table,":[166,228,247],"propose":[168],"efficiently":[170],"select":[171],"without":[174,229],"materializing":[175],"table.":[178],"Note":[179],"typically":[183],"uses":[184],"weighted":[185],"gradients":[186],"approximate":[191],"gradient":[194,206,243],"entire":[197],"Our":[200],"key":[201],"idea":[202],"computation":[207],"table":[214],"pushed":[217],"down":[218],"partial":[220,233],"similarity":[222,235],"tuples":[224],"within":[225],"each":[226],"individual":[227],"join":[230],"materialization.":[231],"These":[232],"values":[236],"aggregated":[239],"estimate":[241],"upper":[250],"bounded":[251],"provable":[253],"theoretical":[254],"guarantees.":[255],"Extensive":[256],"experiments":[257],"show":[258],"our":[260],"method":[261],"improve":[263],"efficiency":[265],"nearly":[267],"2":[268],"orders":[269],"magnitudes,":[271],"while":[272],"keeping":[273],"almost":[274],"same":[276],"accuracy":[277],"training":[279]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":7},{"year":2024,"cited_by_count":8},{"year":2023,"cited_by_count":4},{"year":2022,"cited_by_count":1}],"updated_date":"2026-03-26T15:22:09.906841","created_date":"2025-10-10T00:00:00"}
