{"id":"https://openalex.org/W7105606782","doi":"https://doi.org/10.1109/access.2025.3632400","title":"TransClean: Finding False Positives in Multi-Source Entity Matching Under Real-World Conditions via Transitive Consistency","display_name":"TransClean: Finding False Positives in Multi-Source Entity Matching Under Real-World Conditions via Transitive Consistency","publication_year":2025,"publication_date":"2025-01-01","ids":{"openalex":"https://openalex.org/W7105606782","doi":"https://doi.org/10.1109/access.2025.3632400"},"language":"en","primary_location":{"id":"doi:10.1109/access.2025.3632400","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2025.3632400","pdf_url":null,"source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1109/access.2025.3632400","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Fernando De Meer Pardo","orcid":"https://orcid.org/0000-0003-2233-1858"},"institutions":[{"id":"https://openalex.org/I858936495","display_name":"ZHAW Zurich University of Applied Sciences","ror":"https://ror.org/05pmsvm27","country_code":"CH","type":"education","lineage":["https://openalex.org/I858936495"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Fernando De Meer Pardo","raw_affiliation_strings":["ZHAW Zurich University of Applied Sciences, Winterthur, Switzerland"],"raw_orcid":"https://orcid.org/0000-0003-2233-1858","affiliations":[{"raw_affiliation_string":"ZHAW Zurich University of Applied Sciences, Winterthur, Switzerland","institution_ids":["https://openalex.org/I858936495"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Branka Hadji Misheva","orcid":null},"institutions":[{"id":"https://openalex.org/I130692619","display_name":"Bern University of Applied Sciences","ror":"https://ror.org/02bnkt322","country_code":"CH","type":"education","lineage":["https://openalex.org/I130692619"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Branka Hadji Misheva","raw_affiliation_strings":["Bern University of Applied Sciences, Bern, Switzerland"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Bern University of Applied Sciences, Bern, Switzerland","institution_ids":["https://openalex.org/I130692619"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Martin Braschler","orcid":null},"institutions":[{"id":"https://openalex.org/I858936495","display_name":"ZHAW Zurich University of Applied Sciences","ror":"https://ror.org/05pmsvm27","country_code":"CH","type":"education","lineage":["https://openalex.org/I858936495"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Martin Braschler","raw_affiliation_strings":["ZHAW Zurich University of Applied Sciences, Winterthur, Switzerland"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"ZHAW Zurich University of Applied Sciences, Winterthur, Switzerland","institution_ids":["https://openalex.org/I858936495"]}]},{"author_position":"last","author":{"id":null,"display_name":"Kurt Stockinger","orcid":null},"institutions":[{"id":"https://openalex.org/I858936495","display_name":"ZHAW Zurich University of Applied Sciences","ror":"https://ror.org/05pmsvm27","country_code":"CH","type":"education","lineage":["https://openalex.org/I858936495"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Kurt Stockinger","raw_affiliation_strings":["ZHAW Zurich University of Applied Sciences, Winterthur, Switzerland"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"ZHAW Zurich University of Applied Sciences, Winterthur, Switzerland","institution_ids":["https://openalex.org/I858936495"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":{"value":1850,"currency":"USD","value_usd":1850},"apc_paid":{"value":1850,"currency":"USD","value_usd":1850},"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.58071033,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"13","issue":null,"first_page":"195856","last_page":"195870"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9965999722480774,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9965999722480774,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11273","display_name":"Advanced Graph Neural Networks","score":0.0010999999940395355,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13702","display_name":"Machine Learning in Healthcare","score":0.0005000000237487257,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/pairwise-comparison","display_name":"Pairwise comparison","score":0.8417999744415283},{"id":"https://openalex.org/keywords/false-positive-paradox","display_name":"False positive paradox","score":0.7721999883651733},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.6826000213623047},{"id":"https://openalex.org/keywords/consistency","display_name":"Consistency (knowledge bases)","score":0.6140999794006348},{"id":"https://openalex.org/keywords/false-positives-and-false-negatives","display_name":"False positives and false negatives","score":0.5221999883651733},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.4724000096321106},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4537000060081482},{"id":"https://openalex.org/keywords/transitive-relation","display_name":"Transitive relation","score":0.45210000872612}],"concepts":[{"id":"https://openalex.org/C184898388","wikidata":"https://www.wikidata.org/wiki/Q1435712","display_name":"Pairwise comparison","level":2,"score":0.8417999744415283},{"id":"https://openalex.org/C64869954","wikidata":"https://www.wikidata.org/wiki/Q1859747","display_name":"False positive paradox","level":2,"score":0.7721999883651733},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.6826000213623047},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6549000144004822},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.6140999794006348},{"id":"https://openalex.org/C112789634","wikidata":"https://www.wikidata.org/wiki/Q18207010","display_name":"False positives and false negatives","level":3,"score":0.5221999883651733},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.48570001125335693},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.4724000096321106},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4537000060081482},{"id":"https://openalex.org/C191399111","wikidata":"https://www.wikidata.org/wiki/Q64861","display_name":"Transitive relation","level":2,"score":0.45210000872612},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.4296000003814697},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.4277999997138977},{"id":"https://openalex.org/C136643341","wikidata":"https://www.wikidata.org/wiki/Q1361526","display_name":"Reachability","level":2,"score":0.3513999879360199},{"id":"https://openalex.org/C2780009758","wikidata":"https://www.wikidata.org/wiki/Q6804172","display_name":"Measure (data warehouse)","level":2,"score":0.34060001373291016},{"id":"https://openalex.org/C61455927","wikidata":"https://www.wikidata.org/wiki/Q1030529","display_name":"Blossom algorithm","level":3,"score":0.33869999647140503},{"id":"https://openalex.org/C128896722","wikidata":"https://www.wikidata.org/wiki/Q1501387","display_name":"Transitive closure","level":2,"score":0.30630001425743103},{"id":"https://openalex.org/C2989486834","wikidata":"https://www.wikidata.org/wiki/Q3808900","display_name":"True positive rate","level":2,"score":0.29840001463890076},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.2973000109195709},{"id":"https://openalex.org/C162307627","wikidata":"https://www.wikidata.org/wiki/Q204833","display_name":"Enhanced Data Rates for GSM Evolution","level":2,"score":0.29249998927116394},{"id":"https://openalex.org/C175291020","wikidata":"https://www.wikidata.org/wiki/Q1156822","display_name":"Offset (computer science)","level":2,"score":0.29030001163482666},{"id":"https://openalex.org/C160920958","wikidata":"https://www.wikidata.org/wiki/Q7662746","display_name":"Synthetic data","level":2,"score":0.2888999879360199},{"id":"https://openalex.org/C24756922","wikidata":"https://www.wikidata.org/wiki/Q1757694","display_name":"Data quality","level":3,"score":0.2680000066757202},{"id":"https://openalex.org/C2778029271","wikidata":"https://www.wikidata.org/wiki/Q5421931","display_name":"Extension (predicate logic)","level":2,"score":0.25130000710487366}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1109/access.2025.3632400","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2025.3632400","pdf_url":null,"source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},{"id":"pmh:doi:10.24451/arbor.12416","is_oa":false,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Text"},{"id":"pmh:oai:digitalcollection.zhaw.ch:11475/34710","is_oa":true,"landing_page_url":"https://hdl.handle.net/11475/34710","pdf_url":null,"source":{"id":"https://openalex.org/S4306401810","display_name":"Z\u00fcrcher Hochschule f\u00fcr Angewandte Wissenschaften digital collection (Zurich University of Applied Sciences)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I200744771","host_organization_name":"ZHAW Zurich University of Applied Sciences","host_organization_lineage":["https://openalex.org/I200744771"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Text"},{"id":"pmh:oai:doaj.org/article:5469c10deb61400eb4ee9521a5a413ce","is_oa":true,"landing_page_url":"https://doaj.org/article/5469c10deb61400eb4ee9521a5a413ce","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"IEEE Access, Vol 13, Pp 195856-195870 (2025)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1109/access.2025.3632400","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2025.3632400","pdf_url":null,"source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G2520294667","display_name":null,"funder_award_id":"54383.1IP-ICT","funder_id":"https://openalex.org/F4320327931","funder_display_name":"Innosuisse - Schweizerische Agentur f\u00fcr Innovationsf\u00f6rderung"}],"funders":[{"id":"https://openalex.org/F4320327931","display_name":"Innosuisse - Schweizerische Agentur f\u00fcr Innovationsf\u00f6rderung","ror":"https://ror.org/05a2bhn71"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":43,"referenced_works":["https://openalex.org/W1981590391","https://openalex.org/W2073471108","https://openalex.org/W2087183379","https://openalex.org/W2108087318","https://openalex.org/W2148019918","https://openalex.org/W2148524305","https://openalex.org/W2542998387","https://openalex.org/W2584608588","https://openalex.org/W2748156246","https://openalex.org/W2775160451","https://openalex.org/W2790120012","https://openalex.org/W2805602976","https://openalex.org/W2886633187","https://openalex.org/W2900130171","https://openalex.org/W2946504770","https://openalex.org/W2951147191","https://openalex.org/W2953502323","https://openalex.org/W2963753024","https://openalex.org/W2966720878","https://openalex.org/W2997169693","https://openalex.org/W3011807731","https://openalex.org/W3013103751","https://openalex.org/W3029269967","https://openalex.org/W3034997167","https://openalex.org/W3041011740","https://openalex.org/W3092962901","https://openalex.org/W3093896808","https://openalex.org/W3119752913","https://openalex.org/W3174036215","https://openalex.org/W3197468999","https://openalex.org/W3202190154","https://openalex.org/W4205196528","https://openalex.org/W4221163653","https://openalex.org/W4234053595","https://openalex.org/W4281721601","https://openalex.org/W4283312893","https://openalex.org/W4312634207","https://openalex.org/W4317039121","https://openalex.org/W4321448364","https://openalex.org/W4389116251","https://openalex.org/W4391054937","https://openalex.org/W4407182887","https://openalex.org/W4407355477"],"related_works":[],"abstract_inverted_index":{"We":[0],"present":[1],"TransClean,":[2],"a":[3,63,76,136,228,237,263,283],"method":[4,241],"for":[5,48,279],"detecting":[6],"false":[7,141,187,218,257],"positive":[8,142,149],"predictions":[9,109],"of":[10,73,75,110,119,155,160,181,184,202,205,247,255,259,265],"entity":[11,280],"matching":[12,78,137,191,207,232,240,281,291],"algorithms":[13],"under":[14],"real-world":[15],"conditions":[16],"characterized":[17],"by":[18,94],"large-scale,":[19],"noisy,":[20],"and":[21,43,51,172,186,208,235,243],"unlabeled":[22],"multi-source":[23,284],"datasets":[24],"that":[25,65,175,270],"undergo":[26],"distributional":[27],"shifts.":[28],"TransClean":[29,56,133,226,248,271],"is":[30,104,166],"explicitly":[31],"designed":[32],"to":[33,69,122,216,252,288],"operate":[34],"with":[35,227,236],"multiple":[36],"data":[37],"sources":[38],"in":[39,127,189,249,282],"an":[40,273],"efficient,":[41],"robust":[42],"fast":[44],"manner":[45],"while":[46,144,192],"accounting":[47],"edge":[49],"cases":[50],"requiring":[52,194],"limited":[53],"manual":[54,196],"labeling.":[55,197],"leverages":[57],"the":[58,71,86,108,117,123,158,161,182,190,203,206,245,256],"<italic":[59,80,88,100,111,128,162],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[60,81,83,89,91,101,112,114,129,131,163],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">Transitive":[61,102,164],"Consistency</i>,":[62],"measure":[64],"we":[66,223],"propose,":[67],"aimed":[68],"detect":[70,253],"lack":[72],"consistency":[74],"pairwise":[77,97,231],"model":[79,170,233],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">f</i><sub":[82,113],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">\u03b8</sub>":[84,115],"on":[85,116],"graph":[87],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">G</i><sub":[90,130],"xmlns:xlink=\"http://www.w3.org/1999/xlink\"><i>f</i>\u03b8</sub>":[92],"implied":[93],"its":[95],"predicted":[96],"matches.":[98],"The":[99],"Consistency</i>":[103,165],"calculated":[105],"via":[106],"all":[107],"pairs":[118],"records":[120],"belonging":[121],"same":[124],"connected":[125],"components":[126],"xmlns:xlink=\"http://www.w3.org/1999/xlink\"><i>f</i>\u03b8</sub>.":[132],"iteratively":[134],"modifies":[135],"through":[138,169],"gradually":[139],"removing":[140,145],"matches":[143,150],"as":[146,151,179],"few":[147],"true":[148,185],"possible.":[152],"In":[153,220],"each":[154],"these":[156],"steps,":[157],"estimation":[159],"exclusively":[167],"done":[168],"inference":[171],"produces":[173],"indicators":[174,199],"can":[176],"be":[177],"used":[178],"proxies":[180],"amounts":[183],"positives":[188,258],"not":[193],"any":[195],"These":[198],"produce":[200],"estimates":[201],"quality":[204],"point":[209],"out":[210],"which":[211],"record":[212],"groups":[213],"are":[214],"likely":[215],"contain":[217],"positives.":[219],"our":[221],"experiments,":[222],"compare":[224],"combining":[225],"naively":[229],"trained":[230],"(DistilBERT)":[234],"state-of-the-art":[238],"end-to-end":[239],"(CLER)":[242],"illustrate":[244],"flexibility":[246],"being":[250],"able":[251],"most":[254],"either":[260],"setup":[261],"across":[262],"variety":[264],"datasets.":[266],"Our":[267],"experiments":[268],"show":[269],"induces":[272],"average":[274],"+24.42":[275],"F1":[276],"score":[277],"improvement":[278],"setting":[285],"when":[286],"compared":[287],"traditional":[289],"pair-wise":[290],"algorithms.":[292]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-11-13T00:00:00"}
