{"id":"https://openalex.org/W3161168558","doi":"https://doi.org/10.1145/3447541","title":"TabReformer: Unsupervised Representation Learning for Erroneous Data Detection","display_name":"TabReformer: Unsupervised Representation Learning for Erroneous Data Detection","publication_year":2021,"publication_date":"2021-05-18","ids":{"openalex":"https://openalex.org/W3161168558","doi":"https://doi.org/10.1145/3447541","mag":"3161168558"},"language":"en","primary_location":{"id":"doi:10.1145/3447541","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3447541","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3447541","source":{"id":"https://openalex.org/S4210185969","display_name":"ACM/IMS Transactions on Data Science","issn_l":"2577-3224","issn":["2577-3224","2691-1922"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM/IMS Transactions on Data Science","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"bronze","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3447541","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5032311062","display_name":"Mona Nashaat","orcid":"https://orcid.org/0000-0002-7580-5757"},"institutions":[{"id":"https://openalex.org/I154425047","display_name":"University of Alberta","ror":"https://ror.org/0160cpw27","country_code":"CA","type":"education","lineage":["https://openalex.org/I154425047"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Mona Nashaat","raw_affiliation_strings":["Electrical and Computer Engineering, University of Alberta, Edmonton, Alberta, Canada"],"raw_orcid":"https://orcid.org/0000-0002-7580-5757","affiliations":[{"raw_affiliation_string":"Electrical and Computer Engineering, University of Alberta, Edmonton, Alberta, Canada","institution_ids":["https://openalex.org/I154425047"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5022645420","display_name":"Aindrila Ghosh","orcid":"https://orcid.org/0000-0003-4908-9491"},"institutions":[{"id":"https://openalex.org/I154425047","display_name":"University of Alberta","ror":"https://ror.org/0160cpw27","country_code":"CA","type":"education","lineage":["https://openalex.org/I154425047"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Aindrila Ghosh","raw_affiliation_strings":["Electrical and Computer Engineering, University of Alberta, Edmonton, Alberta, Canada"],"raw_orcid":"https://orcid.org/0000-0003-4908-9491","affiliations":[{"raw_affiliation_string":"Electrical and Computer Engineering, University of Alberta, Edmonton, Alberta, Canada","institution_ids":["https://openalex.org/I154425047"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5030495591","display_name":"James Miller","orcid":"https://orcid.org/0000-0001-5095-3000"},"institutions":[{"id":"https://openalex.org/I154425047","display_name":"University of Alberta","ror":"https://ror.org/0160cpw27","country_code":"CA","type":"education","lineage":["https://openalex.org/I154425047"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"James Miller","raw_affiliation_strings":["Electrical and Computer Engineering, University of Alberta, Edmonton, Alberta, Canada"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Electrical and Computer Engineering, University of Alberta, Edmonton, Alberta, Canada","institution_ids":["https://openalex.org/I154425047"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5031825849","display_name":"Shaikh Quader","orcid":null},"institutions":[{"id":"https://openalex.org/I4210113654","display_name":"IBM (Canada)","ror":"https://ror.org/025sxka56","country_code":"CA","type":"company","lineage":["https://openalex.org/I1341412227","https://openalex.org/I4210113654"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Shaikh Quader","raw_affiliation_strings":["IBM Canada Software Lab, IBM Canada, Toronto, Ontario, Canada"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"IBM Canada Software Lab, IBM Canada, Toronto, Ontario, Canada","institution_ids":["https://openalex.org/I4210113654"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.6997,"has_fulltext":true,"cited_by_count":8,"citation_normalized_percentile":{"value":0.75716002,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":98},"biblio":{"volume":"2","issue":"3","first_page":"1","last_page":"29"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11652","display_name":"Imbalanced Data Classification Techniques","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11652","display_name":"Imbalanced Data Classification Techniques","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9973999857902527,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8118667602539062},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.6033161878585815},{"id":"https://openalex.org/keywords/range","display_name":"Range (aeronautics)","score":0.5439813137054443},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.5356414318084717},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.5275464057922363},{"id":"https://openalex.org/keywords/probabilistic-logic","display_name":"Probabilistic logic","score":0.5272311568260193},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4874765872955322},{"id":"https://openalex.org/keywords/tuple","display_name":"Tuple","score":0.4825839102268219},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.47821447253227234},{"id":"https://openalex.org/keywords/external-data-representation","display_name":"External Data Representation","score":0.47002553939819336},{"id":"https://openalex.org/keywords/mixture-model","display_name":"Mixture model","score":0.4697915315628052},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.45290738344192505},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.4264778792858124},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.0978911817073822}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8118667602539062},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.6033161878585815},{"id":"https://openalex.org/C204323151","wikidata":"https://www.wikidata.org/wiki/Q905424","display_name":"Range (aeronautics)","level":2,"score":0.5439813137054443},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.5356414318084717},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.5275464057922363},{"id":"https://openalex.org/C49937458","wikidata":"https://www.wikidata.org/wiki/Q2599292","display_name":"Probabilistic logic","level":2,"score":0.5272311568260193},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4874765872955322},{"id":"https://openalex.org/C118930307","wikidata":"https://www.wikidata.org/wiki/Q600590","display_name":"Tuple","level":2,"score":0.4825839102268219},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.47821447253227234},{"id":"https://openalex.org/C116409475","wikidata":"https://www.wikidata.org/wiki/Q1385056","display_name":"External Data Representation","level":2,"score":0.47002553939819336},{"id":"https://openalex.org/C61224824","wikidata":"https://www.wikidata.org/wiki/Q2260434","display_name":"Mixture model","level":2,"score":0.4697915315628052},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.45290738344192505},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.4264778792858124},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0978911817073822},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C192562407","wikidata":"https://www.wikidata.org/wiki/Q228736","display_name":"Materials science","level":0,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C118615104","wikidata":"https://www.wikidata.org/wiki/Q121416","display_name":"Discrete mathematics","level":1,"score":0.0},{"id":"https://openalex.org/C159985019","wikidata":"https://www.wikidata.org/wiki/Q181790","display_name":"Composite material","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3447541","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3447541","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3447541","source":{"id":"https://openalex.org/S4210185969","display_name":"ACM/IMS Transactions on Data Science","issn_l":"2577-3224","issn":["2577-3224","2691-1922"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM/IMS Transactions on Data Science","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1145/3447541","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3447541","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3447541","source":{"id":"https://openalex.org/S4210185969","display_name":"ACM/IMS Transactions on Data Science","issn_l":"2577-3224","issn":["2577-3224","2691-1922"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM/IMS Transactions on Data Science","raw_type":"journal-article"},"sustainable_development_goals":[{"score":0.4000000059604645,"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions"}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3161168558.pdf","grobid_xml":"https://content.openalex.org/works/W3161168558.grobid-xml"},"referenced_works_count":56,"referenced_works":["https://openalex.org/W760598031","https://openalex.org/W2021732807","https://openalex.org/W2023639956","https://openalex.org/W2044469685","https://openalex.org/W2046298800","https://openalex.org/W2270070752","https://openalex.org/W2298871042","https://openalex.org/W2425316268","https://openalex.org/W2437617937","https://openalex.org/W2502312327","https://openalex.org/W2544486974","https://openalex.org/W2548122763","https://openalex.org/W2553303224","https://openalex.org/W2591700809","https://openalex.org/W2599674900","https://openalex.org/W2767280887","https://openalex.org/W2767989436","https://openalex.org/W2778368504","https://openalex.org/W2778828356","https://openalex.org/W2784313390","https://openalex.org/W2889249015","https://openalex.org/W2897667608","https://openalex.org/W2899663614","https://openalex.org/W2900611605","https://openalex.org/W2914649379","https://openalex.org/W2929941791","https://openalex.org/W2943955885","https://openalex.org/W2948145720","https://openalex.org/W2948271276","https://openalex.org/W2949736877","https://openalex.org/W2950704571","https://openalex.org/W2955045665","https://openalex.org/W2962369866","https://openalex.org/W2962736273","https://openalex.org/W2963045681","https://openalex.org/W2963307331","https://openalex.org/W2963383025","https://openalex.org/W2964229681","https://openalex.org/W2987471611","https://openalex.org/W2989565627","https://openalex.org/W2995619225","https://openalex.org/W2997756720","https://openalex.org/W3004034804","https://openalex.org/W3004922004","https://openalex.org/W3008440336","https://openalex.org/W3008736634","https://openalex.org/W3009459039","https://openalex.org/W3020873385","https://openalex.org/W3030026364","https://openalex.org/W3035682985","https://openalex.org/W3099714394","https://openalex.org/W3105977086","https://openalex.org/W4214644094","https://openalex.org/W4229501640","https://openalex.org/W4245067498","https://openalex.org/W4299133070"],"related_works":["https://openalex.org/W2393486890","https://openalex.org/W2114760689","https://openalex.org/W1987640280","https://openalex.org/W2055387065","https://openalex.org/W3001496086","https://openalex.org/W2758796827","https://openalex.org/W3199974879","https://openalex.org/W3001095776","https://openalex.org/W2112070477","https://openalex.org/W2109581903"],"abstract_inverted_index":{"Error":[0,90],"detection":[1,14,26],"is":[2],"a":[3,44,88,125,143],"crucial":[4],"preliminary":[5],"phase":[6],"in":[7,38],"any":[8],"data":[9,120,126],"analytics":[10],"pipeline.":[11],"Existing":[12],"error":[13],"techniques":[15,174],"typically":[16],"target":[17],"specific":[18],"types":[19,150],"of":[20,24,58,118,146,151],"errors.":[21],"Moreover,":[22],"most":[23],"these":[25],"models":[27],"either":[28],"require":[29],"user-defined":[30],"rules":[31],"or":[32],"ample":[33],"hand-labeled":[34],"training":[35],"examples.":[36],"Therefore,":[37],"this":[39],"article,":[40],"we":[41],"present":[42],"TabReformer,":[43],"model":[45,56,75,86,111,123],"that":[46,159],"learns":[47],"bidirectional":[48],"encoder":[49,68],"representations":[50],"for":[51,115],"tabular":[52],"data.":[53],"The":[54,122,139,155],"proposed":[55],"consists":[57],"two":[59],"main":[60],"phases.":[61],"In":[62,106],"the":[63,76,85,96,107,110,116,136,164,177],"first":[64],"phase,":[65,109],"TabReformer":[66],"follows":[67],"architecture":[69],"with":[70,95,148,172],"multiple":[71],"self-attention":[72],"layers":[73],"to":[74,101,129,134,182],"dependencies":[77],"between":[78],"cells":[79],"and":[80,153],"capture":[81],"tuple-level":[82],"representations.":[83],"Also,":[84],"utilizes":[87],"Gaussian":[89],"Linear":[91],"Unit":[92],"activation":[93],"function":[94],"Masked":[97],"Data":[98],"Model":[99],"objective":[100],"achieve":[102],"deeper":[103],"probabilistic":[104],"understanding.":[105],"second":[108],"parameters":[112],"are":[113],"fine-tuned":[114],"task":[117],"erroneous":[119,132],"detection.":[121],"applies":[124],"augmentation":[127],"module":[128],"generate":[130],"more":[131],"examples":[133],"represent":[135],"minority":[137],"class.":[138],"experimental":[140],"evaluation":[141],"considers":[142],"wide":[144],"range":[145],"databases":[147],"different":[149],"errors":[152],"distributions.":[154],"empirical":[156],"results":[157],"show":[158],"our":[160],"solution":[161],"can":[162],"enhance":[163],"recall":[165],"values":[166],"by":[167,180],"32.95%":[168],"on":[169],"average":[170],"compared":[171],"state-of-the-art":[173],"while":[175],"reducing":[176],"manual":[178],"effort":[179],"up":[181],"48.86%.":[183]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":5}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
