{"id":"https://openalex.org/W4407986893","doi":"https://doi.org/10.1007/s10579-025-09812-9","title":"Utilizing phonetic similarity for cross-source and cross-language toponym matching: a benchmark and prototype","display_name":"Utilizing phonetic similarity for cross-source and cross-language toponym matching: a benchmark and prototype","publication_year":2025,"publication_date":"2025-02-26","ids":{"openalex":"https://openalex.org/W4407986893","doi":"https://doi.org/10.1007/s10579-025-09812-9"},"language":"en","primary_location":{"id":"doi:10.1007/s10579-025-09812-9","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10579-025-09812-9","pdf_url":"https://link.springer.com/content/pdf/10.1007/s10579-025-09812-9.pdf","source":{"id":"https://openalex.org/S4306424877","display_name":"Language Resources and Evaluation","issn_l":"1574-020X","issn":["1574-020X","1574-0218"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Language Resources and Evaluation","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://link.springer.com/content/pdf/10.1007/s10579-025-09812-9.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5090636024","display_name":"Tomer Sagi","orcid":"https://orcid.org/0000-0002-8916-0128"},"institutions":[{"id":"https://openalex.org/I891191580","display_name":"Aalborg University","ror":"https://ror.org/04m5j1k67","country_code":"DK","type":"education","lineage":["https://openalex.org/I891191580"]}],"countries":["DK"],"is_corresponding":true,"raw_author_name":"Tomer Sagi","raw_affiliation_strings":["Department of Computer Science, Aalborg University, Aalborg, Denmark"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Computer Science, Aalborg University, Aalborg, Denmark","institution_ids":["https://openalex.org/I891191580"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057361055","display_name":"Moran Zaga","orcid":"https://orcid.org/0000-0002-2197-116X"},"institutions":[{"id":"https://openalex.org/I91203450","display_name":"University of Haifa","ror":"https://ror.org/02f009v59","country_code":"IL","type":"education","lineage":["https://openalex.org/I91203450"]}],"countries":["IL"],"is_corresponding":false,"raw_author_name":"Moran Zaga","raw_affiliation_strings":["e-Lijah Lab, University of Haifa, Haifa, Israel"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"e-Lijah Lab, University of Haifa, Haifa, Israel","institution_ids":["https://openalex.org/I91203450"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5018370515","display_name":"Sinai Rusinek","orcid":null},"institutions":[{"id":"https://openalex.org/I91203450","display_name":"University of Haifa","ror":"https://ror.org/02f009v59","country_code":"IL","type":"education","lineage":["https://openalex.org/I91203450"]}],"countries":["IL"],"is_corresponding":false,"raw_author_name":"Sinai Rusinek","raw_affiliation_strings":["e-Lijah Lab, University of Haifa, Haifa, Israel"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"e-Lijah Lab, University of Haifa, Haifa, Israel","institution_ids":["https://openalex.org/I91203450"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5092831395","display_name":"Marcell Fekete","orcid":"https://orcid.org/0009-0007-5025-7866"},"institutions":[{"id":"https://openalex.org/I891191580","display_name":"Aalborg University","ror":"https://ror.org/04m5j1k67","country_code":"DK","type":"education","lineage":["https://openalex.org/I891191580"]}],"countries":["DK"],"is_corresponding":false,"raw_author_name":"Marcell R. Fekete","raw_affiliation_strings":["Department of Computer Science, Aalborg University, Aalborg, Denmark"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Computer Science, Aalborg University, Aalborg, Denmark","institution_ids":["https://openalex.org/I891191580"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013472329","display_name":"Johannes Bjerva","orcid":"https://orcid.org/0000-0002-9512-0739"},"institutions":[{"id":"https://openalex.org/I891191580","display_name":"Aalborg University","ror":"https://ror.org/04m5j1k67","country_code":"DK","type":"education","lineage":["https://openalex.org/I891191580"]}],"countries":["DK"],"is_corresponding":false,"raw_author_name":"Johannes Bjerva","raw_affiliation_strings":["Department of Computer Science, Aalborg University, Aalborg, Denmark"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Computer Science, Aalborg University, Aalborg, Denmark","institution_ids":["https://openalex.org/I891191580"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5015313855","display_name":"Katja Hose","orcid":"https://orcid.org/0000-0001-7025-8099"},"institutions":[{"id":"https://openalex.org/I145847075","display_name":"TU Wien","ror":"https://ror.org/04d836q62","country_code":"AT","type":"education","lineage":["https://openalex.org/I145847075"]},{"id":"https://openalex.org/I891191580","display_name":"Aalborg University","ror":"https://ror.org/04m5j1k67","country_code":"DK","type":"education","lineage":["https://openalex.org/I891191580"]}],"countries":["AT","DK"],"is_corresponding":false,"raw_author_name":"Katja Hose","raw_affiliation_strings":["Department of Computer Science, Aalborg University, Aalborg, Denmark","Institute of Logic and Computation, TU Wien, Vienna, Austria"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Computer Science, Aalborg University, Aalborg, Denmark","institution_ids":["https://openalex.org/I891191580"]},{"raw_affiliation_string":"Institute of Logic and Computation, TU Wien, Vienna, Austria","institution_ids":["https://openalex.org/I145847075"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5090636024"],"corresponding_institution_ids":["https://openalex.org/I891191580"],"apc_list":null,"apc_paid":null,"fwci":1.5233,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.8023069,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":"59","issue":"3","first_page":"2427","last_page":"2451"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10757","display_name":"Geographic Information Systems Studies","score":0.9916999936103821,"subfield":{"id":"https://openalex.org/subfields/3305","display_name":"Geography, Planning and Development"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9916999936103821,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.7996382713317871},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7190993428230286},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.711113452911377},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.699901819229126},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5480579733848572},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4620456397533417},{"id":"https://openalex.org/keywords/cross-correlation","display_name":"Cross-correlation","score":0.4163692593574524},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.32776641845703125},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.13033157587051392},{"id":"https://openalex.org/keywords/geography","display_name":"Geography","score":0.11466369032859802},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.11159268021583557},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.08578649163246155},{"id":"https://openalex.org/keywords/cartography","display_name":"Cartography","score":0.05563667416572571}],"concepts":[{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.7996382713317871},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7190993428230286},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.711113452911377},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.699901819229126},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5480579733848572},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4620456397533417},{"id":"https://openalex.org/C163018871","wikidata":"https://www.wikidata.org/wiki/Q1302587","display_name":"Cross-correlation","level":2,"score":0.4163692593574524},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.32776641845703125},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.13033157587051392},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.11466369032859802},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.11159268021583557},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.08578649163246155},{"id":"https://openalex.org/C58640448","wikidata":"https://www.wikidata.org/wiki/Q42515","display_name":"Cartography","level":1,"score":0.05563667416572571}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1007/s10579-025-09812-9","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10579-025-09812-9","pdf_url":"https://link.springer.com/content/pdf/10.1007/s10579-025-09812-9.pdf","source":{"id":"https://openalex.org/S4306424877","display_name":"Language Resources and Evaluation","issn_l":"1574-020X","issn":["1574-020X","1574-0218"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Language Resources and Evaluation","raw_type":"journal-article"},{"id":"pmh:oai:pure.atira.dk:openaire/57c4e611-5803-45b2-b99c-cad9a332b278","is_oa":true,"landing_page_url":"https://vbn.aau.dk/da/publications/57c4e611-5803-45b2-b99c-cad9a332b278","pdf_url":null,"source":{"id":"https://openalex.org/S4306401731","display_name":"VBN Forskningsportal (Aalborg Universitet)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I891191580","host_organization_name":"Aalborg University","host_organization_lineage":["https://openalex.org/I891191580"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Sagi, T, Zaga, M, Rusinek, S, Fekete, M, Bjerva, J & Hose, K 2025, 'Utilizing phonetic similarity for cross-source and cross-language toponym matching : a benchmark and prototype', Language Resources and Evaluation, vol. 59, no. 3, pp. 2427-2451. https://doi.org/10.1007/s10579-025-09812-9","raw_type":"info:eu-repo/semantics/publishedVersion"}],"best_oa_location":{"id":"doi:10.1007/s10579-025-09812-9","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10579-025-09812-9","pdf_url":"https://link.springer.com/content/pdf/10.1007/s10579-025-09812-9.pdf","source":{"id":"https://openalex.org/S4306424877","display_name":"Language Resources and Evaluation","issn_l":"1574-020X","issn":["1574-020X","1574-0218"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Language Resources and Evaluation","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G2283197534","display_name":null,"funder_award_id":"Semper Ardens: Accelerate programme (project nr. CF21-0454)","funder_id":"https://openalex.org/F4320321504","funder_display_name":"Carlsbergfondet"},{"id":"https://openalex.org/G7116332196","display_name":null,"funder_award_id":"3-17937","funder_id":"https://openalex.org/F4320323706","funder_display_name":"Ministry of Science and Technology, Israel"}],"funders":[{"id":"https://openalex.org/F4320321409","display_name":"Aalborg Universitet","ror":"https://ror.org/04m5j1k67"},{"id":"https://openalex.org/F4320321504","display_name":"Carlsbergfondet","ror":"https://ror.org/01kpjmx04"},{"id":"https://openalex.org/F4320323706","display_name":"Ministry of Science and Technology, Israel","ror":"https://ror.org/02heb2n75"}],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4407986893.pdf"},"referenced_works_count":28,"referenced_works":["https://openalex.org/W48945287","https://openalex.org/W1647671624","https://openalex.org/W1854015338","https://openalex.org/W1983053172","https://openalex.org/W1997927541","https://openalex.org/W2017378397","https://openalex.org/W2034190452","https://openalex.org/W2087064593","https://openalex.org/W2119013622","https://openalex.org/W2134150403","https://openalex.org/W2160580257","https://openalex.org/W2262393948","https://openalex.org/W2560939934","https://openalex.org/W2572670101","https://openalex.org/W2736486661","https://openalex.org/W2751985240","https://openalex.org/W2765285316","https://openalex.org/W2767597228","https://openalex.org/W2912429757","https://openalex.org/W2962916648","https://openalex.org/W3014295153","https://openalex.org/W3117053671","https://openalex.org/W3209230196","https://openalex.org/W4210832368","https://openalex.org/W4229548027","https://openalex.org/W4247397059","https://openalex.org/W4287252285","https://openalex.org/W6999775547"],"related_works":["https://openalex.org/W2378211422","https://openalex.org/W4321353415","https://openalex.org/W2745001401","https://openalex.org/W2130974462","https://openalex.org/W2028665553","https://openalex.org/W2086519370","https://openalex.org/W4246352526","https://openalex.org/W2121910908","https://openalex.org/W915438175","https://openalex.org/W2380946851"],"abstract_inverted_index":{"Abstract":[0],"The":[1,68],"writings":[2],"of":[3,16,161,164,175,220],"one":[4,63],"ancient":[5,23],"civilization":[6,64],"often":[7,33,94,104,124],"overlap":[8,117],"in":[9,22,37,118,172],"time":[10],"and":[11,53,65,108,120,148,154,181,205,217,224],"space":[12,119],"with":[13],"others.":[14],"Many":[15],"these":[17,28,176],"sources":[18,36,82,115,170],"comprise":[19],"unstructured":[20],"text":[21],"languages,":[24,151,177],"causing":[25],"scholars":[26],"studying":[27],"civilizations":[29],"to":[30,43,70,98,199],"be":[31],"siloed,":[32],"relying":[34],"on":[35,62,96],"specific":[38],"languages.":[39],"Most":[40],"recent":[41],"efforts":[42],"extract":[44],"structured":[45],"information":[46],"from":[47,83,145,168,186],"historical":[48,169,179],"scripts":[49],"into":[50],"place":[51],"(toponym)":[52],"people":[54],"databases":[55],"(prospographies)":[56],"have":[57],"followed":[58],"this":[59,136],"pattern,":[60],"focusing":[61],"selected":[66],"sources.":[67],"path":[69],"creating":[71],"a":[72,99,126,140,158,162,182,200,209],"common":[73,100,127,201,210],"database":[74],"runs":[75],"through":[76],"aligning":[77],"names":[78],"or":[79],"toponyms":[80,144],"between":[81],"disparate":[84],"languages":[85,123],"utilizing":[86],"different":[87],"scripts.":[88],"Existing":[89],"multi-lingual":[90],"orthographic":[91],"(string-based)":[92],"comparison":[93,193,207],"relies":[95],"transliteration":[97,198],"script":[101],"(Latin/English).":[102],"Transliteration":[103],"creates":[105],"multiple":[106],"options":[107],"even":[109],"more":[110],"confusion.":[111],"However,":[112],"when":[113],"integrating":[114],"that":[116],"time,":[121],"the":[122,196,215,218,221],"share":[125],"phonetic":[128,206,211],"background.":[129],"This":[130],"commonality":[131],"may":[132],"prove":[133],"beneficial.":[134],"In":[135],"work,":[137],"we":[138],"present":[139],"benchmark":[141,159],"for":[142],"comparing":[143],"two":[146],"linguistically":[147],"culturally":[149],"related":[150],"namely":[152],"Hebrew":[153],"Arabic.":[155],"We":[156,188,213],"provide":[157],"comprised":[160],"set":[163],"dataset":[165,184],"pairs":[166],"created":[167],"written":[171],"Medieval":[173],"variants":[174],"later":[178],"Gazetteers":[180],"modern":[183],"curated":[185],"Wikidata.":[187],"empirically":[189],"evaluate":[190],"several":[191],"toponym":[192],"approaches":[194],"over":[195],"benchmark:":[197],"script,":[202],"direct":[203],"transliteration,":[204],"using":[208],"representation.":[212],"discuss":[214],"results":[216],"limitations":[219],"various":[222],"methods":[223],"outline":[225],"future":[226],"work.":[227]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-06-13T06:13:01.061226","created_date":"2025-10-10T00:00:00"}
