{"id":"https://openalex.org/W4416390560","doi":"https://doi.org/10.3390/a18110723","title":"Efficient Record Linkage in the Age of Large Language Models: The Critical Role of Blocking","display_name":"Efficient Record Linkage in the Age of Large Language Models: The Critical Role of Blocking","publication_year":2025,"publication_date":"2025-11-16","ids":{"openalex":"https://openalex.org/W4416390560","doi":"https://doi.org/10.3390/a18110723"},"language":"en","primary_location":{"id":"doi:10.3390/a18110723","is_oa":true,"landing_page_url":"https://doi.org/10.3390/a18110723","pdf_url":"https://www.mdpi.com/1999-4893/18/11/723/pdf?version=1763531958","source":{"id":"https://openalex.org/S190629608","display_name":"Algorithms","issn_l":"1999-4893","issn":["1999-4893"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Algorithms","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.mdpi.com/1999-4893/18/11/723/pdf?version=1763531958","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102561590","display_name":"Nidhibahen Shah","orcid":null},"institutions":[{"id":"https://openalex.org/I140172145","display_name":"University of Connecticut","ror":"https://ror.org/02der9h97","country_code":"US","type":"education","lineage":["https://openalex.org/I140172145"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Nidhibahen Shah","raw_affiliation_strings":["School of Computing, University of Connecticut, 371 Fairfield Way, Storrs, CT 06269, USA"],"affiliations":[{"raw_affiliation_string":"School of Computing, University of Connecticut, 371 Fairfield Way, Storrs, CT 06269, USA","institution_ids":["https://openalex.org/I140172145"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5120657089","display_name":"Sreevar Patiyara","orcid":null},"institutions":[{"id":"https://openalex.org/I219193219","display_name":"Purdue University West Lafayette","ror":"https://ror.org/02dqehb95","country_code":"US","type":"education","lineage":["https://openalex.org/I219193219"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Sreevar Patiyara","raw_affiliation_strings":["CS Department, Purdue University, 305 N. University St., West Lafayette, IN 47907, USA"],"affiliations":[{"raw_affiliation_string":"CS Department, Purdue University, 305 N. University St., West Lafayette, IN 47907, USA","institution_ids":["https://openalex.org/I219193219"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5104232356","display_name":"Joyanta Basak","orcid":null},"institutions":[{"id":"https://openalex.org/I140172145","display_name":"University of Connecticut","ror":"https://ror.org/02der9h97","country_code":"US","type":"education","lineage":["https://openalex.org/I140172145"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Joyanta Basak","raw_affiliation_strings":["School of Computing, University of Connecticut, 371 Fairfield Way, Storrs, CT 06269, USA"],"affiliations":[{"raw_affiliation_string":"School of Computing, University of Connecticut, 371 Fairfield Way, Storrs, CT 06269, USA","institution_ids":["https://openalex.org/I140172145"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5070824596","display_name":"Sartaj Sahni","orcid":"https://orcid.org/0000-0002-8129-1676"},"institutions":[{"id":"https://openalex.org/I33213144","display_name":"University of Florida","ror":"https://ror.org/02y3ad647","country_code":"US","type":"education","lineage":["https://openalex.org/I33213144"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Sartaj Sahni","raw_affiliation_strings":["CISE Department, University of Florida, Gainesville, FL 32611, USA"],"affiliations":[{"raw_affiliation_string":"CISE Department, University of Florida, Gainesville, FL 32611, USA","institution_ids":["https://openalex.org/I33213144"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5015264944","display_name":"Anup Mathur","orcid":null},"institutions":[{"id":"https://openalex.org/I1333512998","display_name":"United States Census Bureau","ror":"https://ror.org/01qn7cs15","country_code":"US","type":"government","lineage":["https://openalex.org/I1333512998","https://openalex.org/I1343035065"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Anup Mathur","raw_affiliation_strings":["U.S. Census Bureau, 4600 Silver Hill Road, Washington, DC 20233, USA"],"affiliations":[{"raw_affiliation_string":"U.S. Census Bureau, 4600 Silver Hill Road, Washington, DC 20233, USA","institution_ids":["https://openalex.org/I1333512998"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5009015372","display_name":"K. Park","orcid":null},"institutions":[{"id":"https://openalex.org/I1333512998","display_name":"United States Census Bureau","ror":"https://ror.org/01qn7cs15","country_code":"US","type":"government","lineage":["https://openalex.org/I1333512998","https://openalex.org/I1343035065"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Krista Park","raw_affiliation_strings":["U.S. Census Bureau, 4600 Silver Hill Road, Washington, DC 20233, USA"],"affiliations":[{"raw_affiliation_string":"U.S. Census Bureau, 4600 Silver Hill Road, Washington, DC 20233, USA","institution_ids":["https://openalex.org/I1333512998"]}]},{"author_position":"last","author":{"id":null,"display_name":"Sanguthevar Rajasekaran","orcid":null},"institutions":[{"id":"https://openalex.org/I140172145","display_name":"University of Connecticut","ror":"https://ror.org/02der9h97","country_code":"US","type":"education","lineage":["https://openalex.org/I140172145"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Sanguthevar Rajasekaran","raw_affiliation_strings":["School of Computing, University of Connecticut, 371 Fairfield Way, Storrs, CT 06269, USA"],"affiliations":[{"raw_affiliation_string":"School of Computing, University of Connecticut, 371 Fairfield Way, Storrs, CT 06269, USA","institution_ids":["https://openalex.org/I140172145"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I140172145"],"apc_list":{"value":1400,"currency":"CHF","value_usd":1515},"apc_paid":{"value":1400,"currency":"CHF","value_usd":1515},"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.40262715,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"18","issue":"11","first_page":"723","last_page":"723"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9775000214576721,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9775000214576721,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.0034000000450760126,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10215","display_name":"Semantic Web and Ontologies","score":0.002199999988079071,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/blocking","display_name":"Blocking (statistics)","score":0.8259000182151794},{"id":"https://openalex.org/keywords/record-linkage","display_name":"Record linkage","score":0.7921000123023987},{"id":"https://openalex.org/keywords/linkage","display_name":"Linkage (software)","score":0.6988999843597412},{"id":"https://openalex.org/keywords/pairwise-comparison","display_name":"Pairwise comparison","score":0.6211000084877014},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.59579998254776},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5480999946594238}],"concepts":[{"id":"https://openalex.org/C144745244","wikidata":"https://www.wikidata.org/wiki/Q4927286","display_name":"Blocking (statistics)","level":2,"score":0.8259000182151794},{"id":"https://openalex.org/C142210648","wikidata":"https://www.wikidata.org/wiki/Q1266546","display_name":"Record linkage","level":3,"score":0.7921000123023987},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7717999815940857},{"id":"https://openalex.org/C31266012","wikidata":"https://www.wikidata.org/wiki/Q6554340","display_name":"Linkage (software)","level":3,"score":0.6988999843597412},{"id":"https://openalex.org/C184898388","wikidata":"https://www.wikidata.org/wiki/Q1435712","display_name":"Pairwise comparison","level":2,"score":0.6211000084877014},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.59579998254776},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5480999946594238},{"id":"https://openalex.org/C164866538","wikidata":"https://www.wikidata.org/wiki/Q367351","display_name":"Cluster (spacecraft)","level":2,"score":0.4984999895095825},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.35429999232292175},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.3361000120639801},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.32829999923706055},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.32170000672340393},{"id":"https://openalex.org/C95713431","wikidata":"https://www.wikidata.org/wiki/Q631425","display_name":"Vulnerability (computing)","level":2,"score":0.2727999985218048},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2606000006198883}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.3390/a18110723","is_oa":true,"landing_page_url":"https://doi.org/10.3390/a18110723","pdf_url":"https://www.mdpi.com/1999-4893/18/11/723/pdf?version=1763531958","source":{"id":"https://openalex.org/S190629608","display_name":"Algorithms","issn_l":"1999-4893","issn":["1999-4893"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Algorithms","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:d8e03a53f9f34b079423b289cb7c3b6f","is_oa":true,"landing_page_url":"https://doaj.org/article/d8e03a53f9f34b079423b289cb7c3b6f","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Algorithms, Vol 18, Iss 11, p 723 (2025)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.3390/a18110723","is_oa":true,"landing_page_url":"https://doi.org/10.3390/a18110723","pdf_url":"https://www.mdpi.com/1999-4893/18/11/723/pdf?version=1763531958","source":{"id":"https://openalex.org/S190629608","display_name":"Algorithms","issn_l":"1999-4893","issn":["1999-4893"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Algorithms","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G5163040779","display_name":null,"funder_award_id":"CB21RMD0160003","funder_id":"https://openalex.org/F4320331525","funder_display_name":"U.S. Census Bureau"}],"funders":[{"id":"https://openalex.org/F4320331525","display_name":"U.S. Census Bureau","ror":"https://ror.org/01qn7cs15"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4416390560.pdf","grobid_xml":"https://content.openalex.org/works/W4416390560.grobid-xml"},"referenced_works_count":19,"referenced_works":["https://openalex.org/W1572649082","https://openalex.org/W1832192327","https://openalex.org/W2031250218","https://openalex.org/W2053870252","https://openalex.org/W2073471108","https://openalex.org/W2127787701","https://openalex.org/W2798649495","https://openalex.org/W2889907608","https://openalex.org/W2908287046","https://openalex.org/W3014705052","https://openalex.org/W3138971549","https://openalex.org/W4290717512","https://openalex.org/W4320008793","https://openalex.org/W4365128550","https://openalex.org/W4389285719","https://openalex.org/W4391097886","https://openalex.org/W4406458108","https://openalex.org/W4409663264","https://openalex.org/W4412807058"],"related_works":[],"abstract_inverted_index":{"Record":[0],"linkage":[1,28,89,100,139],"is":[2,29,49],"an":[3],"essential":[4],"task":[5],"in":[6,9,46,102,121,126,131,176],"data":[7,65],"integration":[8],"the":[10,52,71,75,103,115,127,132,162,171,177],"fields":[11],"of":[12,26,55,77,95,117,173,179],"healthcare,":[13],"law":[14],"enforcement,":[15],"fraud":[16],"detection,":[17],"transportation,":[18],"biology,":[19],"and":[20],"supply":[21],"chain":[22],"management.":[23],"The":[24],"problem":[25],"record":[27,47,88,99,138],"to":[30,41,86,161],"cluster":[31,39],"records":[32],"from":[33],"various":[34],"sources":[35],"such":[36],"that":[37,114,141,149],"each":[38],"belongs":[40],"a":[42,137],"single":[43],"entity.":[44],"Scalability":[45],"linking":[48],"limited":[50],"by":[51,63,90],"large":[53,107],"number":[54],"pairwise":[56],"comparisons":[57],"required.":[58],"Blocking":[59],"addresses":[60],"this":[61,110],"challenge":[62],"partitioning":[64],"into":[66],"smaller":[67],"parts,":[68],"substantially":[69],"reducing":[70],"computational":[72],"cost.":[73],"With":[74],"advancement":[76],"Large":[78],"Language":[79],"Models":[80],"(LLMs),":[81],"there":[82],"are":[83],"several":[84],"possibilities":[85],"improve":[87],"leveraging":[91],"their":[92],"semantic":[93],"understanding":[94],"textual":[96],"attributes.":[97],"LLM-based":[98],"algorithms":[101],"literature":[104],"have":[105],"very":[106],"runtimes.":[108],"In":[109],"paper,":[111],"we":[112,135],"show":[113],"employment":[116],"blocking":[118,174],"can":[119],"result":[120],"significant":[122],"improvements":[123],"not":[124],"only":[125],"runtime":[128],"but":[129],"also":[130],"accuracy.":[133],"Specifically,":[134],"propose":[136],"algorithm":[140,151],"combines":[142],"LLMs":[143],"with":[144],"blocking.":[145],"Experimental":[146],"evaluation":[147],"demonstrates":[148],"our":[150],"achieves":[152],"lower":[153],"runtimes":[154],"while":[155],"simultaneously":[156],"improving":[157],"F1":[158],"scores":[159],"compared":[160],"approaches":[163],"relying":[164],"solely":[165],"on":[166],"LLMs.":[167],"These":[168],"findings":[169],"demonstrate":[170],"importance":[172],"even":[175],"era":[178],"advanced":[180],"machine":[181],"learning":[182],"models.":[183]},"counts_by_year":[],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-11-19T00:00:00"}
