{"id":"https://openalex.org/W4407355477","doi":"https://doi.org/10.1145/3709715","title":"Progressive Entity Matching: A Design Space Exploration","display_name":"Progressive Entity Matching: A Design Space Exploration","publication_year":2025,"publication_date":"2025-02-10","ids":{"openalex":"https://openalex.org/W4407355477","doi":"https://doi.org/10.1145/3709715"},"language":"en","primary_location":{"id":"doi:10.1145/3709715","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3709715","pdf_url":null,"source":{"id":"https://openalex.org/S4387289859","display_name":"Proceedings of the ACM on Management of Data","issn_l":"2836-6573","issn":["2836-6573"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM on Management of Data","raw_type":"journal-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://doi.org/10.1145/3709715","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5055457607","display_name":"Jakub Maciejewski","orcid":"https://orcid.org/0009-0005-8307-8843"},"institutions":[{"id":"https://openalex.org/I200777214","display_name":"National and Kapodistrian University of Athens","ror":"https://ror.org/04gnjpq42","country_code":"GR","type":"education","lineage":["https://openalex.org/I200777214"]}],"countries":["GR"],"is_corresponding":true,"raw_author_name":"Jakub Maciejewski","raw_affiliation_strings":["National and Kapodistrian University of Athens, Athens, Greece"],"affiliations":[{"raw_affiliation_string":"National and Kapodistrian University of Athens, Athens, Greece","institution_ids":["https://openalex.org/I200777214"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5093760528","display_name":"Konstantinos Nikoletos","orcid":"https://orcid.org/0000-0003-3465-1197"},"institutions":[{"id":"https://openalex.org/I200777214","display_name":"National and Kapodistrian University of Athens","ror":"https://ror.org/04gnjpq42","country_code":"GR","type":"education","lineage":["https://openalex.org/I200777214"]}],"countries":["GR"],"is_corresponding":false,"raw_author_name":"Konstantinos Nikoletos","raw_affiliation_strings":["National and Kapodistrian University of Athens, Athens, Greece"],"affiliations":[{"raw_affiliation_string":"National and Kapodistrian University of Athens, Athens, Greece","institution_ids":["https://openalex.org/I200777214"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056976720","display_name":"George Papadakis","orcid":"https://orcid.org/0000-0002-7298-9431"},"institutions":[{"id":"https://openalex.org/I200777214","display_name":"National and Kapodistrian University of Athens","ror":"https://ror.org/04gnjpq42","country_code":"GR","type":"education","lineage":["https://openalex.org/I200777214"]}],"countries":["GR"],"is_corresponding":false,"raw_author_name":"George Papadakis","raw_affiliation_strings":["National and Kapodistrian University of Athens, Athens, Greece"],"affiliations":[{"raw_affiliation_string":"National and Kapodistrian University of Athens, Athens, Greece","institution_ids":["https://openalex.org/I200777214"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5046955405","display_name":"Yannis Velegrakis","orcid":"https://orcid.org/0000-0001-6332-0296"},"institutions":[{"id":"https://openalex.org/I193223587","display_name":"University of Trento","ror":"https://ror.org/05trd4x28","country_code":"IT","type":"education","lineage":["https://openalex.org/I193223587"]},{"id":"https://openalex.org/I193662353","display_name":"Utrecht University","ror":"https://ror.org/04pp8hn57","country_code":"NL","type":"education","lineage":["https://openalex.org/I193662353"]}],"countries":["IT","NL"],"is_corresponding":false,"raw_author_name":"Yannis Velegrakis","raw_affiliation_strings":["University of Trento and Utrecht University, Utrecht, Netherlands"],"affiliations":[{"raw_affiliation_string":"University of Trento and Utrecht University, Utrecht, Netherlands","institution_ids":["https://openalex.org/I193662353","https://openalex.org/I193223587"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5055457607"],"corresponding_institution_ids":["https://openalex.org/I200777214"],"apc_list":null,"apc_paid":null,"fwci":13.2131,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.98407343,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":99},"biblio":{"volume":"3","issue":"1","first_page":"1","last_page":"25"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10764","display_name":"Privacy-Preserving Technologies in Data","score":0.9797000288963318,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10538","display_name":"Data Mining Algorithms and Applications","score":0.9470000267028809,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/space","display_name":"Space (punctuation)","score":0.5643547773361206},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.5421169996261597},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.4934317171573639},{"id":"https://openalex.org/keywords/geology","display_name":"Geology","score":0.35412657260894775},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.18963554501533508}],"concepts":[{"id":"https://openalex.org/C2778572836","wikidata":"https://www.wikidata.org/wiki/Q380933","display_name":"Space (punctuation)","level":2,"score":0.5643547773361206},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.5421169996261597},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.4934317171573639},{"id":"https://openalex.org/C127313418","wikidata":"https://www.wikidata.org/wiki/Q1069","display_name":"Geology","level":0,"score":0.35412657260894775},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.18963554501533508},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3709715","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3709715","pdf_url":null,"source":{"id":"https://openalex.org/S4387289859","display_name":"Proceedings of the ACM on Management of Data","issn_l":"2836-6573","issn":["2836-6573"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM on Management of Data","raw_type":"journal-article"},{"id":"pmh:oai:arXiv.org:2503.08298","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2503.08298","pdf_url":"https://arxiv.org/pdf/2503.08298","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"doi:10.1145/3709715","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3709715","pdf_url":null,"source":{"id":"https://openalex.org/S4387289859","display_name":"Proceedings of the ACM on Management of Data","issn_l":"2836-6573","issn":["2836-6573"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM on Management of Data","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":41,"referenced_works":["https://openalex.org/W69311973","https://openalex.org/W87992039","https://openalex.org/W1155226818","https://openalex.org/W1981590391","https://openalex.org/W2017167478","https://openalex.org/W2024386211","https://openalex.org/W2067566391","https://openalex.org/W2106675345","https://openalex.org/W2108087318","https://openalex.org/W2148524305","https://openalex.org/W2168440643","https://openalex.org/W2399361902","https://openalex.org/W2798649495","https://openalex.org/W2900130171","https://openalex.org/W2946741276","https://openalex.org/W2949985202","https://openalex.org/W2967709572","https://openalex.org/W2981852735","https://openalex.org/W2998702515","https://openalex.org/W3013103751","https://openalex.org/W3014295153","https://openalex.org/W3029269967","https://openalex.org/W3032015135","https://openalex.org/W3092962901","https://openalex.org/W3105771849","https://openalex.org/W3137039868","https://openalex.org/W3138971549","https://openalex.org/W3155747247","https://openalex.org/W3174544005","https://openalex.org/W3197468999","https://openalex.org/W4206595948","https://openalex.org/W4283312893","https://openalex.org/W4288089799","https://openalex.org/W4366290747","https://openalex.org/W4366729173","https://openalex.org/W4383051975","https://openalex.org/W4385270265","https://openalex.org/W4388348930","https://openalex.org/W4389116251","https://openalex.org/W4400909574","https://openalex.org/W6660884492"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2324615561","https://openalex.org/W2086120259","https://openalex.org/W2390279801","https://openalex.org/W2245170124","https://openalex.org/W2076393078","https://openalex.org/W4391913857","https://openalex.org/W2358668433"],"abstract_inverted_index":{"Entity":[0,64],"Resolution":[1],"(ER)":[2],"is":[3],"typically":[4],"implemented":[5],"as":[6],"a":[7,33,40,59,97,124,154,196],"batch":[8],"task":[9],"that":[10,36,66,111,149,192],"processes":[11],"all":[12],"available":[13],"data":[14],"before":[15],"identifying":[16],"duplicate":[17],"records.":[18],"However,":[19],"applications":[20],"with":[21,96,143,188],"time":[22,210],"or":[23],"computational":[24],"constraints,":[25],"e.g.,":[26],"those":[27],"running":[28],"in":[29,39,51,131,161,205],"the":[30,52,78,82,104,107,112,116,129,132,136,157,162,166],"cloud,":[31],"require":[32],"progressive":[34,202],"approach":[35],"produces":[37],"results":[38,190],"pay-as-you-go":[41],"fashion.":[42],"Numerous":[43],"algorithms":[44],"have":[45],"been":[46],"proposed":[47],"for":[48,62,181,186],"Progressive":[49,63],"ER":[50],"literature.":[53],"In":[54],"this":[55],"work,":[56],"we":[57],"propose":[58],"novel":[60,146],"framework":[61,151,173],"Matching":[65],"organizes":[67],"relevant":[68],"techniques":[69,203],"into":[70],"four":[71],"consecutive":[72],"steps:":[73],"(i)":[74],"filtering,":[75],"which":[76,89,102,122],"reduces":[77],"search":[79],"space":[80],"to":[81,128],"most":[83,167],"likely":[84],"candidate":[85,94,108],"matches,":[86],"(ii)":[87],"weighting,":[88],"associates":[90],"every":[91],"pair":[92],"of":[93,106,156,199,207],"matches":[95,109],"similarity":[98],"score,":[99],"(iii)":[100],"scheduling,":[101],"prioritizes":[103],"execution":[105],"so":[110],"real":[113],"duplicates":[114],"precede":[115],"non-matching":[117],"pairs,":[118],"and":[119,145,174,184,209],"(iv)":[120],"matching,":[121],"applies":[123],"complex,":[125],"matching":[126],"function":[127],"pairs":[130],"order":[133],"defined":[134],"by":[135],"previous":[137],"step.":[138],"We":[139,164],"associate":[140],"each":[141],"step":[142],"existing":[144,159],"techniques,":[147],"illustrating":[148],"our":[150,172,189,193],"overall":[152],"generates":[153],"superset":[155],"main":[158],"works":[160],"field.":[163],"select":[165],"representative":[168],"combinations":[169],"resulting":[170],"from":[171],"fine-tune":[175],"them":[176],"over":[177],"10":[178],"established":[179],"datasets":[180],"Record":[182],"Linkage":[183],"8":[185],"Deduplication,":[187],"indicating":[191],"taxonomy":[194],"yields":[195],"wide":[197],"range":[198],"high":[200],"performing":[201],"both":[204],"terms":[206],"effectiveness":[208],"efficiency.":[211]},"counts_by_year":[{"year":2025,"cited_by_count":6}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-02-11T00:00:00"}
