{"id":"https://openalex.org/W2992897306","doi":"https://doi.org/10.1145/3336191.3371813","title":"AutoBlock","display_name":"AutoBlock","publication_year":2020,"publication_date":"2020-01-20","ids":{"openalex":"https://openalex.org/W2992897306","doi":"https://doi.org/10.1145/3336191.3371813","mag":"2992897306"},"language":"en","primary_location":{"id":"doi:10.1145/3336191.3371813","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3336191.3371813","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 13th International Conference on Web Search and Data Mining","raw_type":"proceedings-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/1912.03417","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Wei Zhang","orcid":null},"institutions":[{"id":"https://openalex.org/I135310074","display_name":"University of Wisconsin\u2013Madison","ror":"https://ror.org/01y2jtd41","country_code":"US","type":"education","lineage":["https://openalex.org/I135310074"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Wei Zhang","raw_affiliation_strings":["University of Wisconsin-Madison, Madison, WI, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Wisconsin-Madison, Madison, WI, USA","institution_ids":["https://openalex.org/I135310074"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Hao Wei","orcid":null},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Hao Wei","raw_affiliation_strings":["Amazon.com, Seattle, WA, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Amazon.com, Seattle, WA, USA","institution_ids":["https://openalex.org/I1311688040"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Bunyamin Sisman","orcid":null},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Bunyamin Sisman","raw_affiliation_strings":["Amazon.com, Seattle, WA, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Amazon.com, Seattle, WA, USA","institution_ids":["https://openalex.org/I1311688040"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Xin Luna Dong","orcid":null},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Xin Luna Dong","raw_affiliation_strings":["Amazon.com, Seattle, WA, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Amazon.com, Seattle, WA, USA","institution_ids":["https://openalex.org/I1311688040"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Christos Faloutsos","orcid":null},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Christos Faloutsos","raw_affiliation_strings":["Carnegie Mellon University, Pittsburgh, PA, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University, Pittsburgh, PA, USA","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"last","author":{"id":null,"display_name":"Davd Page","orcid":null},"institutions":[{"id":"https://openalex.org/I170897317","display_name":"Duke University","ror":"https://ror.org/00py81415","country_code":"US","type":"education","lineage":["https://openalex.org/I170897317"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Davd Page","raw_affiliation_strings":["Duke University, Durham, NC, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Duke University, Durham, NC, USA","institution_ids":["https://openalex.org/I170897317"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I135310074"],"apc_list":null,"apc_paid":null,"fwci":3.1583,"has_fulltext":false,"cited_by_count":38,"citation_normalized_percentile":{"value":0.91770195,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"744","last_page":"752"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10764","display_name":"Privacy-Preserving Technologies in Data","score":0.9966999888420105,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9020000100135803,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/blocking","display_name":"Blocking (statistics)","score":0.6575999855995178},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5547000169754028},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.527400016784668},{"id":"https://openalex.org/keywords/record-linkage","display_name":"Record linkage","score":0.37279999256134033},{"id":"https://openalex.org/keywords/component","display_name":"Component (thermodynamics)","score":0.2646999955177307}],"concepts":[{"id":"https://openalex.org/C144745244","wikidata":"https://www.wikidata.org/wiki/Q4927286","display_name":"Blocking (statistics)","level":2,"score":0.6575999855995178},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6075000166893005},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5547000169754028},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.527400016784668},{"id":"https://openalex.org/C142210648","wikidata":"https://www.wikidata.org/wiki/Q1266546","display_name":"Record linkage","level":3,"score":0.37279999256134033},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.34950000047683716},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.31049999594688416},{"id":"https://openalex.org/C168167062","wikidata":"https://www.wikidata.org/wiki/Q1117970","display_name":"Component (thermodynamics)","level":2,"score":0.2646999955177307},{"id":"https://openalex.org/C3020493868","wikidata":"https://www.wikidata.org/wiki/Q55631277","display_name":"Real world data","level":2,"score":0.2410999983549118},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.23839999735355377}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3336191.3371813","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3336191.3371813","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 13th International Conference on Web Search and Data Mining","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:1912.03417","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1912.03417","pdf_url":"https://arxiv.org/pdf/1912.03417","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:1912.03417","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1912.03417","pdf_url":"https://arxiv.org/pdf/1912.03417","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":23,"referenced_works":["https://openalex.org/W1482991935","https://openalex.org/W2012833704","https://openalex.org/W2017851434","https://openalex.org/W2031250218","https://openalex.org/W2079649893","https://openalex.org/W2105016630","https://openalex.org/W2111116800","https://openalex.org/W2139490252","https://openalex.org/W2139646386","https://openalex.org/W2147717514","https://openalex.org/W2152502401","https://openalex.org/W2250539671","https://openalex.org/W2397525010","https://openalex.org/W2397770138","https://openalex.org/W2493916176","https://openalex.org/W2529367823","https://openalex.org/W2535168187","https://openalex.org/W2612732335","https://openalex.org/W3011807731","https://openalex.org/W6632038546","https://openalex.org/W6677953180","https://openalex.org/W6739901393","https://openalex.org/W6842097421"],"related_works":[],"abstract_inverted_index":{"Entity":[0],"matching":[1,23],"seeks":[2],"to":[3,15,39],"identify":[4],"data":[5,11,65],"records":[6],"over":[7],"one":[8],"or":[9],"multiple":[10],"sources":[12],"that":[13,32],"refer":[14],"the":[16,34,45],"same":[17],"real-world":[18],"entity.":[19],"Virtually":[20],"every":[21],"entity":[22],"task":[24],"on":[25,59],"large":[26],"datasets":[27],"requires":[28],"blocking,":[29],"a":[30],"step":[31],"reduces":[33],"number":[35],"of":[36,44],"record":[37],"pairs":[38],"be":[40],"matched.":[41],"However,":[42],"most":[43],"traditional":[46],"blocking":[47,68],"methods":[48],"are":[49,56],"learning-free":[50],"and":[51,53,66],"key-based,":[52],"their":[54],"successes":[55],"largely":[57],"built":[58],"laborious":[60],"human":[61],"effort":[62],"in":[63],"cleaning":[64],"designing":[67],"keys.":[69]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":10},{"year":2024,"cited_by_count":10},{"year":2023,"cited_by_count":6},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":9},{"year":2020,"cited_by_count":1}],"updated_date":"2026-04-28T14:05:53.105641","created_date":"2019-12-13T00:00:00"}
