{"id":"https://openalex.org/W2897852178","doi":"https://doi.org/10.1145/3269206.3272005","title":"\"Deep\" Learning for Missing Value Imputationin Tables with Non-Numerical Data","display_name":"\"Deep\" Learning for Missing Value Imputationin Tables with Non-Numerical Data","publication_year":2018,"publication_date":"2018-10-17","ids":{"openalex":"https://openalex.org/W2897852178","doi":"https://doi.org/10.1145/3269206.3272005","mag":"2897852178"},"language":"en","primary_location":{"id":"doi:10.1145/3269206.3272005","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3269206.3272005","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 27th ACM International Conference on Information and Knowledge Management","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5024953386","display_name":"Felix Bie\u00dfmann","orcid":"https://orcid.org/0000-0002-3422-1026"},"institutions":[{"id":"https://openalex.org/I4210089985","display_name":"Amazon (Germany)","ror":"https://ror.org/00b9ktm87","country_code":"DE","type":"company","lineage":["https://openalex.org/I1311688040","https://openalex.org/I4210089985"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Felix Biessmann","raw_affiliation_strings":["Amazon Research, Berlin, Germany"],"affiliations":[{"raw_affiliation_string":"Amazon Research, Berlin, Germany","institution_ids":["https://openalex.org/I4210089985"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5078696011","display_name":"David Salinas","orcid":"https://orcid.org/0000-0002-8980-4018"},"institutions":[{"id":"https://openalex.org/I4210089985","display_name":"Amazon (Germany)","ror":"https://ror.org/00b9ktm87","country_code":"DE","type":"company","lineage":["https://openalex.org/I1311688040","https://openalex.org/I4210089985"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"David Salinas","raw_affiliation_strings":["Amazon Research, Berlin, Germany"],"affiliations":[{"raw_affiliation_string":"Amazon Research, Berlin, Germany","institution_ids":["https://openalex.org/I4210089985"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5090934117","display_name":"Sebastian Schelter","orcid":"https://orcid.org/0000-0003-4722-5840"},"institutions":[{"id":"https://openalex.org/I4210089985","display_name":"Amazon (Germany)","ror":"https://ror.org/00b9ktm87","country_code":"DE","type":"company","lineage":["https://openalex.org/I1311688040","https://openalex.org/I4210089985"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Sebastian Schelter","raw_affiliation_strings":["Amazon Research, Berlin, Germany"],"affiliations":[{"raw_affiliation_string":"Amazon Research, Berlin, Germany","institution_ids":["https://openalex.org/I4210089985"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109246682","display_name":"Philipp Schmidt","orcid":null},"institutions":[{"id":"https://openalex.org/I4210089985","display_name":"Amazon (Germany)","ror":"https://ror.org/00b9ktm87","country_code":"DE","type":"company","lineage":["https://openalex.org/I1311688040","https://openalex.org/I4210089985"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Philipp Schmidt","raw_affiliation_strings":["Amazon Research, Berlin, Germany"],"affiliations":[{"raw_affiliation_string":"Amazon Research, Berlin, Germany","institution_ids":["https://openalex.org/I4210089985"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5070190076","display_name":"Dustin Lange","orcid":null},"institutions":[{"id":"https://openalex.org/I4210089985","display_name":"Amazon (Germany)","ror":"https://ror.org/00b9ktm87","country_code":"DE","type":"company","lineage":["https://openalex.org/I1311688040","https://openalex.org/I4210089985"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Dustin Lange","raw_affiliation_strings":["Amazon Research, Berlin, Germany"],"affiliations":[{"raw_affiliation_string":"Amazon Research, Berlin, Germany","institution_ids":["https://openalex.org/I4210089985"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5024953386"],"corresponding_institution_ids":["https://openalex.org/I4210089985"],"apc_list":null,"apc_paid":null,"fwci":5.7171,"has_fulltext":false,"cited_by_count":65,"citation_normalized_percentile":{"value":0.96116815,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"2017","last_page":"2025"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9789999723434448,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.9729999899864197,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/imputation","display_name":"Imputation (statistics)","score":0.8183648586273193},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7617945671081543},{"id":"https://openalex.org/keywords/missing-data","display_name":"Missing data","score":0.7126948237419128},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.5845940709114075},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.5508404970169067},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.4695739448070526},{"id":"https://openalex.org/keywords/rendering","display_name":"Rendering (computer graphics)","score":0.46416452527046204},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4470576047897339},{"id":"https://openalex.org/keywords/plug-in","display_name":"Plug-in","score":0.417807936668396},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.40145522356033325},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.09787654876708984}],"concepts":[{"id":"https://openalex.org/C58041806","wikidata":"https://www.wikidata.org/wiki/Q1660484","display_name":"Imputation (statistics)","level":3,"score":0.8183648586273193},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7617945671081543},{"id":"https://openalex.org/C9357733","wikidata":"https://www.wikidata.org/wiki/Q6878417","display_name":"Missing data","level":2,"score":0.7126948237419128},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.5845940709114075},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.5508404970169067},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.4695739448070526},{"id":"https://openalex.org/C205711294","wikidata":"https://www.wikidata.org/wiki/Q176953","display_name":"Rendering (computer graphics)","level":2,"score":0.46416452527046204},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4470576047897339},{"id":"https://openalex.org/C4924752","wikidata":"https://www.wikidata.org/wiki/Q184148","display_name":"Plug-in","level":2,"score":0.417807936668396},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.40145522356033325},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.09787654876708984},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3269206.3272005","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3269206.3272005","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 27th ACM International Conference on Information and Knowledge Management","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":32,"referenced_works":["https://openalex.org/W48643329","https://openalex.org/W1522301498","https://openalex.org/W1983479840","https://openalex.org/W2044758663","https://openalex.org/W2046298800","https://openalex.org/W2054141820","https://openalex.org/W2063103859","https://openalex.org/W2064675550","https://openalex.org/W2096863518","https://openalex.org/W2118502261","https://openalex.org/W2146130798","https://openalex.org/W2146332392","https://openalex.org/W2162313689","https://openalex.org/W2186615578","https://openalex.org/W2189465200","https://openalex.org/W2357449897","https://openalex.org/W2437990191","https://openalex.org/W2510090379","https://openalex.org/W2526273923","https://openalex.org/W2591700809","https://openalex.org/W2604272474","https://openalex.org/W2610751324","https://openalex.org/W2613282090","https://openalex.org/W2613597870","https://openalex.org/W2618530766","https://openalex.org/W2753069234","https://openalex.org/W2766369253","https://openalex.org/W2950133940","https://openalex.org/W2963626623","https://openalex.org/W2997591727","https://openalex.org/W3003365835","https://openalex.org/W6677724928"],"related_works":["https://openalex.org/W2181530120","https://openalex.org/W4211215373","https://openalex.org/W2024529227","https://openalex.org/W1574575415","https://openalex.org/W3144172081","https://openalex.org/W3179858851","https://openalex.org/W3028371478","https://openalex.org/W2081476516","https://openalex.org/W2581984549","https://openalex.org/W3123177881"],"abstract_inverted_index":{"The":[0,201],"success":[1],"of":[2,12,17,134,146,152,207,229],"applications":[3],"that":[4,71,108,182],"process":[5],"data":[6,19,81,88,93,125,153,230,233],"critically":[7],"depends":[8],"on":[9,86,124,136,158,189],"the":[10,13,109,208],"quality":[11],"ingested":[14],"data.":[15,257],"Completeness":[16],"a":[18,64,97,132,137,141,149,160,196,221],"source":[20],"is":[21,112,131,235],"essential":[22],"in":[23,59,82,101,213,227,244,253],"many":[24],"cases.":[25],"Yet,":[26],"most":[27],"missing":[28,251],"value":[29],"imputation":[30,50,70,143,210],"approaches":[31],"suffer":[32],"from":[33,96],"severe":[34],"limitations.":[35],"They":[36],"are":[37,53],"almost":[38],"exclusively":[39],"restricted":[40],"to":[41,55,69,73,164,172,249],"numerical":[42],"data,":[43],"and":[44,57,66,105,115],"they":[45],"either":[46],"offer":[47],"only":[48],"simple":[49,184],"methods":[51,194],"or":[52],"difficult":[54],"scale":[56],"maintain":[58],"production.":[60],"Here":[61],"we":[62,180],"present":[63],"robust":[65],"scalable":[67,114],"approach":[68,111,156],"extends":[72],"tables":[74,254],"with":[75,127,191,255],"non-numerical":[76,256],"values,":[77],"including":[78],"unstructured":[79],"text":[80],"diverse":[83],"languages.":[84],"Experiments":[85],"public":[87],"sets":[89,94,126,154],"as":[90,92,220,237,239],"well":[91,238],"sampled":[95],"large":[98],"product":[99],"catalog":[100],"different":[102],"languages":[103],"(English":[104],"Japanese)":[106],"demonstrate":[107],"proposed":[110,202],"both":[113,224],"yields":[116],"more":[117],"accurate":[118],"imputations":[119],"than":[120],"previous":[121],"approaches.":[122],"Training":[123],"several":[128],"million":[129],"rows":[130],"matter":[133],"minutes":[135],"single":[138],"machine.":[139],"With":[140],"median":[142],"F1":[144],"score":[145],"0.93":[147],"across":[148],"broad":[150],"selection":[151],"our":[155,168],"achieves":[157],"average":[159],"23-fold":[161],"improvement":[162],"compared":[163],"mode":[165],"imputation.":[166],"While":[167],"system":[169],"allows":[170],"users":[171],"apply":[173],"state-of-the-art":[174],"deep":[175,192],"learning":[176,193,246],"models":[177,187],"if":[178],"needed,":[179],"find":[181],"often":[183],"linear":[185],"n-gram":[186],"perform":[188],"par":[190],"at":[195],"much":[197],"lower":[198],"operational":[199],"cost.":[200],"method":[203],"learns":[204],"all":[205],"parameters":[206],"entire":[209],"pipeline":[211],"automatically":[212],"an":[214],"end-to-end":[215],"fashion,":[216],"rendering":[217],"it":[218],"attractive":[219],"generic":[222],"plugin":[223],"for":[225,240],"engineers":[226],"charge":[228],"pipelines":[231],"where":[232],"completeness":[234],"relevant,":[236],"practitioners":[241],"without":[242],"expertise":[243],"machine":[245],"who":[247],"need":[248],"impute":[250],"values":[252]},"counts_by_year":[{"year":2025,"cited_by_count":6},{"year":2024,"cited_by_count":10},{"year":2023,"cited_by_count":10},{"year":2022,"cited_by_count":12},{"year":2021,"cited_by_count":15},{"year":2020,"cited_by_count":10},{"year":2019,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
