{"id":"https://openalex.org/W7117593042","doi":"https://doi.org/10.1109/iisa66859.2025.11311205","title":"An Experimental Evaluation of Pre-Trained Models for Efficient and Accurate Record Linkage","display_name":"An Experimental Evaluation of Pre-Trained Models for Efficient and Accurate Record Linkage","publication_year":2025,"publication_date":"2025-07-10","ids":{"openalex":"https://openalex.org/W7117593042","doi":"https://doi.org/10.1109/iisa66859.2025.11311205"},"language":null,"primary_location":{"id":"doi:10.1109/iisa66859.2025.11311205","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iisa66859.2025.11311205","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 16th International Conference on Information, Intelligence, Systems &amp;amp; Applications (IISA)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5021354735","display_name":"Dimitrios Karapiperis","orcid":"https://orcid.org/0000-0002-3878-5988"},"institutions":[{"id":"https://openalex.org/I183898223","display_name":"International Hellenic University","ror":"https://ror.org/00708jp83","country_code":"GR","type":"education","lineage":["https://openalex.org/I183898223"]}],"countries":["GR"],"is_corresponding":true,"raw_author_name":"Dimitrios Karapiperis","raw_affiliation_strings":["International Hellenic University,Thessaloniki,Greece"],"affiliations":[{"raw_affiliation_string":"International Hellenic University,Thessaloniki,Greece","institution_ids":["https://openalex.org/I183898223"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5003484641","display_name":"Georgios Feretzakis","orcid":"https://orcid.org/0000-0002-3597-1187"},"institutions":[{"id":"https://openalex.org/I231025917","display_name":"Hellenic Open University","ror":"https://ror.org/02kq26x23","country_code":"GR","type":"education","lineage":["https://openalex.org/I231025917"]}],"countries":["GR"],"is_corresponding":false,"raw_author_name":"Georgios Feretzakis","raw_affiliation_strings":["Hellenic Open University,Patras,Greece"],"affiliations":[{"raw_affiliation_string":"Hellenic Open University,Patras,Greece","institution_ids":["https://openalex.org/I231025917"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5121539603","display_name":"Vassilios S. Verykios","orcid":null},"institutions":[{"id":"https://openalex.org/I231025917","display_name":"Hellenic Open University","ror":"https://ror.org/02kq26x23","country_code":"GR","type":"education","lineage":["https://openalex.org/I231025917"]}],"countries":["GR"],"is_corresponding":false,"raw_author_name":"Vassilios S. Verykios","raw_affiliation_strings":["Hellenic Open University,Patras,Greece"],"affiliations":[{"raw_affiliation_string":"Hellenic Open University,Patras,Greece","institution_ids":["https://openalex.org/I231025917"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5021354735"],"corresponding_institution_ids":["https://openalex.org/I183898223"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.74048623,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9911999702453613,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9911999702453613,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.0019000000320374966,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11273","display_name":"Advanced Graph Neural Networks","score":0.0010999999940395355,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.557699978351593},{"id":"https://openalex.org/keywords/record-linkage","display_name":"Record linkage","score":0.5184000134468079},{"id":"https://openalex.org/keywords/linkage","display_name":"Linkage (software)","score":0.4603999853134155},{"id":"https://openalex.org/keywords/hash-function","display_name":"Hash function","score":0.4569000005722046},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.4388999938964844},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.43529999256134033},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.4075999855995178},{"id":"https://openalex.org/keywords/feature-matching","display_name":"Feature matching","score":0.33709999918937683}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7317000031471252},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.559499979019165},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.557699978351593},{"id":"https://openalex.org/C142210648","wikidata":"https://www.wikidata.org/wiki/Q1266546","display_name":"Record linkage","level":3,"score":0.5184000134468079},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.49869999289512634},{"id":"https://openalex.org/C31266012","wikidata":"https://www.wikidata.org/wiki/Q6554340","display_name":"Linkage (software)","level":3,"score":0.4603999853134155},{"id":"https://openalex.org/C99138194","wikidata":"https://www.wikidata.org/wiki/Q183427","display_name":"Hash function","level":2,"score":0.4569000005722046},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.4388999938964844},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.43529999256134033},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.4075999855995178},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3961000144481659},{"id":"https://openalex.org/C2983787585","wikidata":"https://www.wikidata.org/wiki/Q93586","display_name":"Feature matching","level":3,"score":0.33709999918937683},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.33480000495910645},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.3174000084400177},{"id":"https://openalex.org/C2780762811","wikidata":"https://www.wikidata.org/wiki/Q1784941","display_name":"Cosine similarity","level":3,"score":0.3149999976158142},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.298799991607666},{"id":"https://openalex.org/C68859911","wikidata":"https://www.wikidata.org/wiki/Q1503724","display_name":"Pattern matching","level":2,"score":0.2948000133037567},{"id":"https://openalex.org/C160920958","wikidata":"https://www.wikidata.org/wiki/Q7662746","display_name":"Synthetic data","level":2,"score":0.28630000352859497},{"id":"https://openalex.org/C2781170535","wikidata":"https://www.wikidata.org/wiki/Q30587856","display_name":"Noisy data","level":2,"score":0.2842000126838684},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.2827000021934509},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.28029999136924744},{"id":"https://openalex.org/C3020493868","wikidata":"https://www.wikidata.org/wiki/Q55631277","display_name":"Real world data","level":2,"score":0.25619998574256897}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/iisa66859.2025.11311205","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iisa66859.2025.11311205","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 16th International Conference on Information, Intelligence, Systems &amp;amp; Applications (IISA)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":5,"referenced_works":["https://openalex.org/W1992930793","https://openalex.org/W2012833704","https://openalex.org/W2250539671","https://openalex.org/W2970641574","https://openalex.org/W4385573970"],"related_works":[],"abstract_inverted_index":{"Record":[0],"linkage":[1,109],"in":[2,11],"noisy":[3],"and":[4,28,89],"heterogeneous":[5],"datasets":[6,71],"remains":[7],"a":[8],"persistent":[9],"challenge":[10],"data":[12],"integration.":[13],"In":[14],"this":[15],"work,":[16],"we":[17],"investigate":[18],"the":[19,30,33,48,74,82,95],"impact":[20],"of":[21,32,51,97],"various":[22],"pre-trained":[23],"models-including":[24],"GloVe,":[25],"DistilBERT,":[26],"S-GTR-T5,":[27],"all-MiniLM-L6-v2-on":[29],"effectiveness":[31],"Cosine":[34],"LSH":[35],"blocking-matching":[36],"pipeline.":[37],"We":[38],"show":[39],"that":[40,73],"embedding":[41],"records":[42],"via":[43],"transformerbased":[44],"models":[45,56],"significantly":[46],"enhances":[47],"semantic":[49],"matching":[50],"entity":[52],"pairs,":[53],"especially":[54],"when":[55],"are":[57],"fine-tuned":[58],"using":[59],"contrastive":[60],"learning":[61],"on":[62],"labeled":[63],"match/non-match":[64],"pairs.":[65],"Extensive":[66],"experiments":[67],"across":[68],"four":[69],"benchmark":[70],"demonstrate":[72],"all-MiniLM-L6-v2":[75],"model":[76],"consistently":[77],"outperforms":[78],"all":[79],"others,":[80],"achieving":[81],"highest":[83],"F1-scores":[84],"while":[85],"maintaining":[86],"fast":[87],"vectorization":[88],"robust":[90],"recall.":[91],"These":[92],"results":[93],"highlight":[94],"utility":[96],"combining":[98],"lightweight":[99],"transformer":[100],"encoders":[101],"with":[102],"efficient":[103],"hashing":[104],"mechanisms":[105],"for":[106],"large-scale":[107],"record":[108],"tasks.":[110]},"counts_by_year":[],"updated_date":"2025-12-31T23:11:33.660297","created_date":"2025-12-30T00:00:00"}
