{"id":"https://openalex.org/W4233226427","doi":"https://doi.org/10.14778/3275366.3275374","title":"ICARUS","display_name":"ICARUS","publication_year":2018,"publication_date":"2018-09-01","ids":{"openalex":"https://openalex.org/W4233226427","doi":"https://doi.org/10.14778/3275366.3275374"},"language":"en","primary_location":{"id":"doi:10.14778/3275366.3275374","is_oa":false,"landing_page_url":"https://doi.org/10.14778/3275366.3275374","pdf_url":null,"source":{"id":"https://openalex.org/S4210226185","display_name":"Proceedings of the VLDB Endowment","issn_l":"2150-8097","issn":["2150-8097"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the VLDB Endowment","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5031877441","display_name":"Protiva Rahman","orcid":"https://orcid.org/0000-0003-0000-8558"},"institutions":[{"id":"https://openalex.org/I52357470","display_name":"The Ohio State University","ror":"https://ror.org/00rs6vg23","country_code":"US","type":"education","lineage":["https://openalex.org/I52357470"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Protiva Rahman","raw_affiliation_strings":["The Ohio State University"],"affiliations":[{"raw_affiliation_string":"The Ohio State University","institution_ids":["https://openalex.org/I52357470"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5003341743","display_name":"Courtney Hebert","orcid":"https://orcid.org/0000-0003-1646-702X"},"institutions":[{"id":"https://openalex.org/I52357470","display_name":"The Ohio State University","ror":"https://ror.org/00rs6vg23","country_code":"US","type":"education","lineage":["https://openalex.org/I52357470"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Courtney Hebert","raw_affiliation_strings":["The Ohio State University"],"affiliations":[{"raw_affiliation_string":"The Ohio State University","institution_ids":["https://openalex.org/I52357470"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5001906560","display_name":"Arnab Nandi","orcid":"https://orcid.org/0000-0002-4138-603X"},"institutions":[{"id":"https://openalex.org/I52357470","display_name":"The Ohio State University","ror":"https://ror.org/00rs6vg23","country_code":"US","type":"education","lineage":["https://openalex.org/I52357470"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Arnab Nandi","raw_affiliation_strings":["The Ohio State University"],"affiliations":[{"raw_affiliation_string":"The Ohio State University","institution_ids":["https://openalex.org/I52357470"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5031877441"],"corresponding_institution_ids":["https://openalex.org/I52357470"],"apc_list":null,"apc_paid":null,"fwci":0.3927,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.7822162,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":"11","issue":"13","first_page":"2263","last_page":"2276"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10538","display_name":"Data Mining Algorithms and Applications","score":0.9968000054359436,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10538","display_name":"Data Mining Algorithms and Applications","score":0.9968000054359436,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.989300012588501,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11106","display_name":"Data Management and Algorithms","score":0.9891999959945679,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/missing-data","display_name":"Missing data","score":0.9023622870445251},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.765721321105957},{"id":"https://openalex.org/keywords/imputation","display_name":"Imputation (statistics)","score":0.7620583772659302},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.638285219669342},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.6054081916809082},{"id":"https://openalex.org/keywords/schema","display_name":"Schema (genetic algorithms)","score":0.46116727590560913},{"id":"https://openalex.org/keywords/heuristic","display_name":"Heuristic","score":0.44025540351867676},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.43214085698127747},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.3715837001800537},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.2707803249359131},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.25651252269744873},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.10643735527992249}],"concepts":[{"id":"https://openalex.org/C9357733","wikidata":"https://www.wikidata.org/wiki/Q6878417","display_name":"Missing data","level":2,"score":0.9023622870445251},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.765721321105957},{"id":"https://openalex.org/C58041806","wikidata":"https://www.wikidata.org/wiki/Q1660484","display_name":"Imputation (statistics)","level":3,"score":0.7620583772659302},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.638285219669342},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.6054081916809082},{"id":"https://openalex.org/C52146309","wikidata":"https://www.wikidata.org/wiki/Q7431116","display_name":"Schema (genetic algorithms)","level":2,"score":0.46116727590560913},{"id":"https://openalex.org/C173801870","wikidata":"https://www.wikidata.org/wiki/Q201413","display_name":"Heuristic","level":2,"score":0.44025540351867676},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.43214085698127747},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.3715837001800537},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2707803249359131},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.25651252269744873},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.10643735527992249},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.14778/3275366.3275374","is_oa":false,"landing_page_url":"https://doi.org/10.14778/3275366.3275374","pdf_url":null,"source":{"id":"https://openalex.org/S4210226185","display_name":"Proceedings of the VLDB Endowment","issn_l":"2150-8097","issn":["2150-8097"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the VLDB Endowment","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2181530120","https://openalex.org/W4211215373","https://openalex.org/W2024529227","https://openalex.org/W1574575415","https://openalex.org/W3144172081","https://openalex.org/W3179858851","https://openalex.org/W3028371478","https://openalex.org/W2081476516","https://openalex.org/W2581984549","https://openalex.org/W3123177881"],"abstract_inverted_index":{"An":[0],"important":[1],"step":[2],"in":[3,111,137,168,181,240],"data":[4,73,89,104,139,182,244],"preparation":[5],"involves":[6],"dealing":[7],"with":[8,69,122],"incomplete":[9],"datasets.":[10],"In":[11],"some":[12],"cases,":[13],"the":[14,24,36,88,103,108,120,133,161,166,169,176,193,199,208,227],"missing":[15,37,60,72,113,138,204,243],"values":[16,61],"are":[17,21,27],"unreported":[18],"because":[19],"they":[20],"characteristics":[22],"of":[23,35,71,87,102,135,165,171,221,242],"domain":[25,49],"and":[26,40,46,78,90],"known":[28],"by":[29,183,206],"practitioners.":[30],"Due":[31],"to":[32,58,84,98,125,159,178,192,202,211],"this":[33],"nature":[34],"values,":[38],"imputation":[39],"inference":[41],"methods":[42],"do":[43],"not":[44],"work":[45],"input":[47,201],"from":[48,140],"experts":[50,57],"is":[51,62,76,144],"required.":[52],"A":[53],"common":[54],"method":[55],"for":[56,66,81],"fill":[59,180,239],"through":[63],"rules.":[64,94],"However,":[65],"large":[67,142],"datasets":[68,225],"thousands":[70],"points,":[74],"it":[75],"laborious":[77],"time":[79],"consuming":[80],"a":[82,141,156,172],"user":[83,121,162,177,232],"make":[85,126],"sense":[86],"formulate":[91],"effective":[92],"completion":[93],"Thus,":[95],"users":[96,237],"need":[97],"be":[99],"shown":[100],"subsets":[101,117,130,164],"that":[105,131,235],"will":[106],"have":[107],"most":[109],"impact":[110],"completing":[112],"fields.":[114],"Further,":[115,230],"these":[116,149],"should":[118],"provide":[119],"enough":[123],"information":[124],"an":[127,218,246],"update.":[128],"Choosing":[129],"maximize":[132],"probability":[134],"filling":[136],"dataset":[143],"computationally":[145],"expensive.":[146],"To":[147],"address":[148],"challenges,":[150],"we":[151],"present":[152],"Icarus,":[153],"which":[154],"uses":[155],"heuristic":[157],"algorithm":[158],"show":[160,215],"small":[163],"database":[167,209],"form":[170],"matrix.":[173,194],"This":[174],"allows":[175],"iteratively":[179],"applying":[184],"suggested":[185,196],"rules":[186,197],"based":[187],"on":[188],"their":[189],"direct":[190],"edits":[191],"The":[195],"amplify":[198],"users'":[200],"multiple":[203],"fields":[205],"using":[207],"schema":[210],"infer":[212],"hierarchies.":[213],"Simulations":[214],"Icarus":[216],"has":[217],"average":[219],"improvement":[220],"50%":[222],"across":[223],"three":[224],"over":[226],"baseline":[228],"system.":[229],"in-person":[231],"studies":[233],"demonstrate":[234],"naive":[236],"can":[238],"68%":[241],"within":[245],"hour,":[247],"while":[248],"manual":[249],"rule":[250],"specification":[251],"spans":[252],"weeks.":[253]},"counts_by_year":[{"year":2020,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2022-05-12T00:00:00"}
