{"id":"https://openalex.org/W2907376131","doi":"https://doi.org/10.1109/icdm.2018.00054","title":"SuperPart: Supervised Graph Partitioning for Record Linkage","display_name":"SuperPart: Supervised Graph Partitioning for Record Linkage","publication_year":2018,"publication_date":"2018-11-01","ids":{"openalex":"https://openalex.org/W2907376131","doi":"https://doi.org/10.1109/icdm.2018.00054","mag":"2907376131"},"language":"en","primary_location":{"id":"doi:10.1109/icdm.2018.00054","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icdm.2018.00054","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 IEEE International Conference on Data Mining (ICDM)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5067834158","display_name":"Russell Reas","orcid":null},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Russell Reas","raw_affiliation_strings":["Amazon, Seattle, WA, USA"],"affiliations":[{"raw_affiliation_string":"Amazon, Seattle, WA, USA","institution_ids":["https://openalex.org/I1311688040"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5028498314","display_name":"Steve Ash","orcid":null},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Steve Ash","raw_affiliation_strings":["Amazon, Seattle, WA, USA"],"affiliations":[{"raw_affiliation_string":"Amazon, Seattle, WA, USA","institution_ids":["https://openalex.org/I1311688040"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5021733716","display_name":"Rob Barton","orcid":"https://orcid.org/0000-0001-8062-4224"},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Rob Barton","raw_affiliation_strings":["Amazon, New York, NY, USA"],"affiliations":[{"raw_affiliation_string":"Amazon, New York, NY, USA","institution_ids":["https://openalex.org/I1311688040"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5035143245","display_name":"Andrew Borthwick","orcid":"https://orcid.org/0000-0003-1192-0032"},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Andrew Borthwick","raw_affiliation_strings":["Amazon, Seattle, WA, USA"],"affiliations":[{"raw_affiliation_string":"Amazon, Seattle, WA, USA","institution_ids":["https://openalex.org/I1311688040"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5067834158"],"corresponding_institution_ids":["https://openalex.org/I1311688040"],"apc_list":null,"apc_paid":null,"fwci":1.4822,"has_fulltext":false,"cited_by_count":8,"citation_normalized_percentile":{"value":0.84739149,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":"1","issue":null,"first_page":"387","last_page":"396"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10764","display_name":"Privacy-Preserving Technologies in Data","score":0.9922000169754028,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11273","display_name":"Advanced Graph Neural Networks","score":0.9854999780654907,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6638702750205994},{"id":"https://openalex.org/keywords/ground-truth","display_name":"Ground truth","score":0.5152021050453186},{"id":"https://openalex.org/keywords/graph","display_name":"Graph","score":0.4912716746330261},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.48280012607574463},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.4793187379837036},{"id":"https://openalex.org/keywords/equivalence","display_name":"Equivalence (formal languages)","score":0.4432177245616913},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.3487456440925598},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.345306396484375},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3254837095737457},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.3211915194988251},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.20919758081436157}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6638702750205994},{"id":"https://openalex.org/C146849305","wikidata":"https://www.wikidata.org/wiki/Q370766","display_name":"Ground truth","level":2,"score":0.5152021050453186},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.4912716746330261},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.48280012607574463},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.4793187379837036},{"id":"https://openalex.org/C2780069185","wikidata":"https://www.wikidata.org/wiki/Q7977945","display_name":"Equivalence (formal languages)","level":2,"score":0.4432177245616913},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.3487456440925598},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.345306396484375},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3254837095737457},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3211915194988251},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.20919758081436157},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.0},{"id":"https://openalex.org/C118615104","wikidata":"https://www.wikidata.org/wiki/Q121416","display_name":"Discrete mathematics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icdm.2018.00054","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icdm.2018.00054","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 IEEE International Conference on Data Mining (ICDM)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":38,"referenced_works":["https://openalex.org/W192724328","https://openalex.org/W1137713401","https://openalex.org/W1480376833","https://openalex.org/W1602412127","https://openalex.org/W1676999057","https://openalex.org/W1981590391","https://openalex.org/W1995541812","https://openalex.org/W2002055996","https://openalex.org/W2007682403","https://openalex.org/W2022704179","https://openalex.org/W2024770506","https://openalex.org/W2072240081","https://openalex.org/W2073471108","https://openalex.org/W2112493011","https://openalex.org/W2117974736","https://openalex.org/W2123561513","https://openalex.org/W2124166542","https://openalex.org/W2127048411","https://openalex.org/W2135964318","https://openalex.org/W2139646386","https://openalex.org/W2147299132","https://openalex.org/W2148524305","https://openalex.org/W2150593711","https://openalex.org/W2261544779","https://openalex.org/W2262188337","https://openalex.org/W2397525010","https://openalex.org/W2399361902","https://openalex.org/W2748156246","https://openalex.org/W2799004609","https://openalex.org/W3102641634","https://openalex.org/W3214957292","https://openalex.org/W4213214460","https://openalex.org/W4230502578","https://openalex.org/W4254788633","https://openalex.org/W6607776381","https://openalex.org/W6627425218","https://openalex.org/W6637459119","https://openalex.org/W6712521661"],"related_works":["https://openalex.org/W4295532600","https://openalex.org/W2063823869","https://openalex.org/W2389214306","https://openalex.org/W2047973478","https://openalex.org/W2067569035","https://openalex.org/W2090985514","https://openalex.org/W4235240664","https://openalex.org/W2965083567","https://openalex.org/W1838576100","https://openalex.org/W2095886385"],"abstract_inverted_index":{"Identifying":[0],"sets":[1],"of":[2,35,41,55,71,121,145,152,165],"items":[3],"that":[4,137,177],"are":[5],"equivalent":[6,147],"to":[7,14,68,80,117,132,185],"one":[8],"another":[9],"is":[10,65,89],"a":[11,25,47,97,107,118,127,179],"problem":[12,82,144],"common":[13],"many":[15],"fields.":[16],"Systems":[17],"addressing":[18],"this":[19,60,190],"generally":[20,105],"have":[21,83],"at":[22],"their":[23,110],"core":[24],"function":[26],"s(d_i,":[27],"d_j)":[28],"for":[29],"computing":[30],"the":[31,53,69,81,101,143,163,173],"similarity":[32],"between":[33],"pairs":[34],"records":[36,57,148],"d_i,":[37],"d_j.":[38],"The":[39],"output":[40,106],"s()":[42],"can":[43],"be":[44,94],"interpreted":[45],"as":[46],"weighted":[48],"graph":[49,61,133],"where":[50],"edges":[51],"indicate":[52],"likelihood":[54],"two":[56],"matching.":[58],"Partitioning":[59],"into":[62],"equivalence":[63],"classes":[64],"non-trivial":[66],"due":[67],"presence":[70],"inconsistencies":[72],"and":[73,112],"imperfections":[74],"in":[75,109,189],"s().":[76],"Numerous":[77],"algorithmic":[78],"approaches":[79],"been":[84],"proposed,":[85],"but":[86],"(1)":[87],"it":[88],"unclear":[90],"which":[91],"approach":[92,131],"should":[93],"used":[95],"on":[96,142],"given":[98],"dataset;":[99],"(2)":[100],"algorithms":[102,153],"do":[103],"not":[104],"confidence":[108,167],"decisions;":[111],"(3)":[113],"require":[114],"error-prone":[115],"tuning":[116],"particular":[119],"notion":[120],"ground":[122],"truth.":[123],"We":[124,135],"present":[125],"SuperPart,":[126],"scalable,":[128],"supervised":[129],"learning":[130],"partitioning.":[134],"demonstrate":[136],"SuperPart":[138],"yields":[139],"competitive":[140],"results":[141],"detecting":[146],"without":[149],"manual":[150],"selection":[151],"or":[154],"an":[155],"exhaustive":[156],"search":[157],"over":[158],"hyperparameters.":[159],"Also,":[160],"we":[161,192],"show":[162],"quality":[164],"SuperPart's":[166],"measures":[168],"by":[169,182],"reporting":[170],"Area":[171],"Under":[172],"Precision-Recall":[174],"Curve":[175],"metrics":[176],"exceed":[178],"baseline":[180],"measure":[181],"11%.":[183],"Finally,":[184],"bolster":[186],"additional":[187],"research":[188],"domain,":[191],"release":[193],"three":[194],"new":[195],"datasets":[196],"derived":[197],"from":[198],"real-world":[199],"Amazon":[200],"product":[201],"data":[202],"along":[203],"with":[204],"ground-truth":[205],"partitionings.":[206]},"counts_by_year":[{"year":2023,"cited_by_count":1},{"year":2021,"cited_by_count":4},{"year":2020,"cited_by_count":2},{"year":2019,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
