{"id":"https://openalex.org/W2751985240","doi":"https://doi.org/10.1080/17538947.2017.1371253","title":"Learning to combine multiple string similarity metrics for effective toponym matching","display_name":"Learning to combine multiple string similarity metrics for effective toponym matching","publication_year":2017,"publication_date":"2017-09-06","ids":{"openalex":"https://openalex.org/W2751985240","doi":"https://doi.org/10.1080/17538947.2017.1371253","mag":"2751985240"},"language":"en","primary_location":{"id":"doi:10.1080/17538947.2017.1371253","is_oa":false,"landing_page_url":"https://doi.org/10.1080/17538947.2017.1371253","pdf_url":null,"source":{"id":"https://openalex.org/S199162493","display_name":"International Journal of Digital Earth","issn_l":"1753-8947","issn":["1753-8947","1753-8955"],"is_oa":false,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320547","host_organization_name":"Taylor & Francis","host_organization_lineage":["https://openalex.org/P4310320547"],"host_organization_lineage_names":["Taylor & Francis"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"International Journal of Digital Earth","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://eprints.lancs.ac.uk/id/eprint/89481/1/Manusc_Combining_Multiple_String_Similarity_Metrics_for_Effective_Toponym_Matching.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5018982878","display_name":"Rui Santos","orcid":"https://orcid.org/0000-0001-5981-9924"},"institutions":[{"id":"https://openalex.org/I121345201","display_name":"Instituto de Engenharia de Sistemas e Computadores Investiga\u00e7\u00e3o e Desenvolvimento","ror":"https://ror.org/04mqy3p58","country_code":"PT","type":"nonprofit","lineage":["https://openalex.org/I121345201","https://openalex.org/I4210125590"]},{"id":"https://openalex.org/I141596103","display_name":"University of Lisbon","ror":"https://ror.org/01c27hj86","country_code":"PT","type":"education","lineage":["https://openalex.org/I141596103"]}],"countries":["PT"],"is_corresponding":true,"raw_author_name":"Rui Santos","raw_affiliation_strings":["Instituto Superior T\u00e9cnico and INESC-ID, University of Lisbon, Lisbon, Portugal"],"raw_orcid":"https://orcid.org/0000-0001-5981-9924","affiliations":[{"raw_affiliation_string":"Instituto Superior T\u00e9cnico and INESC-ID, University of Lisbon, Lisbon, Portugal","institution_ids":["https://openalex.org/I121345201","https://openalex.org/I141596103"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5007907503","display_name":"Patricia Murrieta\u2010Flores","orcid":"https://orcid.org/0000-0001-9904-0288"},"institutions":[{"id":"https://openalex.org/I137265193","display_name":"University of Chester","ror":"https://ror.org/01drpwb22","country_code":"GB","type":"education","lineage":["https://openalex.org/I137265193"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Patricia Murrieta-Flores","raw_affiliation_strings":["Digital Humanities Research Center, University of Chester, Chester, UK"],"raw_orcid":"https://orcid.org/0000-0001-9904-0288","affiliations":[{"raw_affiliation_string":"Digital Humanities Research Center, University of Chester, Chester, UK","institution_ids":["https://openalex.org/I137265193"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5055101594","display_name":"Bruno Martins","orcid":"https://orcid.org/0000-0002-3856-2936"},"institutions":[{"id":"https://openalex.org/I121345201","display_name":"Instituto de Engenharia de Sistemas e Computadores Investiga\u00e7\u00e3o e Desenvolvimento","ror":"https://ror.org/04mqy3p58","country_code":"PT","type":"nonprofit","lineage":["https://openalex.org/I121345201","https://openalex.org/I4210125590"]},{"id":"https://openalex.org/I141596103","display_name":"University of Lisbon","ror":"https://ror.org/01c27hj86","country_code":"PT","type":"education","lineage":["https://openalex.org/I141596103"]}],"countries":["PT"],"is_corresponding":false,"raw_author_name":"Bruno Martins","raw_affiliation_strings":["Instituto Superior T\u00e9cnico and INESC-ID, University of Lisbon, Lisbon, Portugal"],"raw_orcid":"https://orcid.org/0000-0002-3856-2936","affiliations":[{"raw_affiliation_string":"Instituto Superior T\u00e9cnico and INESC-ID, University of Lisbon, Lisbon, Portugal","institution_ids":["https://openalex.org/I121345201","https://openalex.org/I141596103"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5018982878"],"corresponding_institution_ids":["https://openalex.org/I121345201","https://openalex.org/I141596103"],"apc_list":{"value":2390,"currency":"USD","value_usd":2390},"apc_paid":null,"fwci":20.399,"has_fulltext":true,"cited_by_count":51,"citation_normalized_percentile":{"value":0.98963754,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":99},"biblio":{"volume":"11","issue":"9","first_page":"913","last_page":"938"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10757","display_name":"Geographic Information Systems Studies","score":0.9975000023841858,"subfield":{"id":"https://openalex.org/subfields/3305","display_name":"Geography, Planning and Development"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T10757","display_name":"Geographic Information Systems Studies","score":0.9975000023841858,"subfield":{"id":"https://openalex.org/subfields/3305","display_name":"Geography, Planning and Development"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9955000281333923,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11106","display_name":"Data Management and Algorithms","score":0.9925000071525574,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.7463656067848206},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7038770914077759},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.6258586645126343},{"id":"https://openalex.org/keywords/referent","display_name":"Referent","score":0.6219197511672974},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6165720224380493},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5504266023635864},{"id":"https://openalex.org/keywords/string-metric","display_name":"String metric","score":0.4981698989868164},{"id":"https://openalex.org/keywords/metric","display_name":"Metric (unit)","score":0.4863296151161194},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.443888783454895},{"id":"https://openalex.org/keywords/semantic-similarity","display_name":"Semantic similarity","score":0.43677499890327454},{"id":"https://openalex.org/keywords/string","display_name":"String (physics)","score":0.43039610981941223},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.3841046094894409},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.3614594340324402},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3396100401878357},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3295067548751831},{"id":"https://openalex.org/keywords/string-searching-algorithm","display_name":"String searching algorithm","score":0.2811322808265686},{"id":"https://openalex.org/keywords/pattern-matching","display_name":"Pattern matching","score":0.20836448669433594},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.14887428283691406},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.10838115215301514},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.07329040765762329}],"concepts":[{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.7463656067848206},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7038770914077759},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.6258586645126343},{"id":"https://openalex.org/C2777096784","wikidata":"https://www.wikidata.org/wiki/Q3826351","display_name":"Referent","level":2,"score":0.6219197511672974},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6165720224380493},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5504266023635864},{"id":"https://openalex.org/C22820288","wikidata":"https://www.wikidata.org/wiki/Q9050568","display_name":"String metric","level":4,"score":0.4981698989868164},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.4863296151161194},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.443888783454895},{"id":"https://openalex.org/C130318100","wikidata":"https://www.wikidata.org/wiki/Q2268914","display_name":"Semantic similarity","level":2,"score":0.43677499890327454},{"id":"https://openalex.org/C157486923","wikidata":"https://www.wikidata.org/wiki/Q1376436","display_name":"String (physics)","level":2,"score":0.43039610981941223},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.3841046094894409},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3614594340324402},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3396100401878357},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3295067548751831},{"id":"https://openalex.org/C7757238","wikidata":"https://www.wikidata.org/wiki/Q374040","display_name":"String searching algorithm","level":3,"score":0.2811322808265686},{"id":"https://openalex.org/C68859911","wikidata":"https://www.wikidata.org/wiki/Q1503724","display_name":"Pattern matching","level":2,"score":0.20836448669433594},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.14887428283691406},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.10838115215301514},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.07329040765762329},{"id":"https://openalex.org/C37914503","wikidata":"https://www.wikidata.org/wiki/Q156495","display_name":"Mathematical physics","level":1,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1080/17538947.2017.1371253","is_oa":false,"landing_page_url":"https://doi.org/10.1080/17538947.2017.1371253","pdf_url":null,"source":{"id":"https://openalex.org/S199162493","display_name":"International Journal of Digital Earth","issn_l":"1753-8947","issn":["1753-8947","1753-8955"],"is_oa":false,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320547","host_organization_name":"Taylor & Francis","host_organization_lineage":["https://openalex.org/P4310320547"],"host_organization_lineage_names":["Taylor & Francis"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"International Journal of Digital Earth","raw_type":"journal-article"},{"id":"pmh:oai:eprints.lancs.ac.uk:89481","is_oa":true,"landing_page_url":null,"pdf_url":"https://eprints.lancs.ac.uk/id/eprint/89481/1/Manusc_Combining_Multiple_String_Similarity_Metrics_for_Effective_Toponym_Matching.pdf","source":{"id":"https://openalex.org/S4306401916","display_name":"Lancaster EPrints (Lancaster University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67415387","host_organization_name":"Lancaster University","host_organization_lineage":["https://openalex.org/I67415387"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"PeerReviewed"},{"id":"pmh:oai:chesterrep.openrepository.com:10034/621275","is_oa":false,"landing_page_url":"http://hdl.handle.net/10034/621275","pdf_url":null,"source":{"id":"https://openalex.org/S4377196415","display_name":"ChesterRep (University of Chester)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I137265193","host_organization_name":"University of Chester","host_organization_lineage":["https://openalex.org/I137265193"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"eissn: 1753-8955","raw_type":"Article"},{"id":"pmh:oai:doaj.org/article:28b624ae2a8b46d5a018b2551c453690","is_oa":false,"landing_page_url":"https://doaj.org/article/28b624ae2a8b46d5a018b2551c453690","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"International Journal of Digital Earth, Vol 11, Iss 9, Pp 913-938 (2018)","raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:eprints.lancs.ac.uk:89481","is_oa":true,"landing_page_url":null,"pdf_url":"https://eprints.lancs.ac.uk/id/eprint/89481/1/Manusc_Combining_Multiple_String_Similarity_Metrics_for_Effective_Toponym_Matching.pdf","source":{"id":"https://openalex.org/S4306401916","display_name":"Lancaster EPrints (Lancaster University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67415387","host_organization_name":"Lancaster University","host_organization_lineage":["https://openalex.org/I67415387"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"PeerReviewed"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G3332191635","display_name":null,"funder_award_id":"CMUP-ERI/TIC/0046/2014","funder_id":"https://openalex.org/F4320334779","funder_display_name":"Funda\u00e7\u00e3o para a Ci\u00eancia e a Tecnologia"},{"id":"https://openalex.org/G4682063952","display_name":null,"funder_award_id":"PTDC/EEI-SCR/1743/2014","funder_id":"https://openalex.org/F4320334779","funder_display_name":"Funda\u00e7\u00e3o para a Ci\u00eancia e a Tecnologia"},{"id":"https://openalex.org/G683289958","display_name":null,"funder_award_id":"ES/R003890/1","funder_id":"https://openalex.org/F4320334630","funder_display_name":"Economic and Social Research Council"}],"funders":[{"id":"https://openalex.org/F4320334630","display_name":"Economic and Social Research Council","ror":"https://ror.org/03n0ht308"},{"id":"https://openalex.org/F4320334779","display_name":"Funda\u00e7\u00e3o para a Ci\u00eancia e a Tecnologia","ror":"https://ror.org/00snfqn58"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2751985240.pdf","grobid_xml":"https://content.openalex.org/works/W2751985240.grobid-xml"},"referenced_works_count":60,"referenced_works":["https://openalex.org/W48945287","https://openalex.org/W1428814278","https://openalex.org/W1483426847","https://openalex.org/W1492815834","https://openalex.org/W1503398984","https://openalex.org/W1520449809","https://openalex.org/W1538296207","https://openalex.org/W1595552350","https://openalex.org/W1601795611","https://openalex.org/W1647671624","https://openalex.org/W1678356000","https://openalex.org/W1766062831","https://openalex.org/W1840435438","https://openalex.org/W1906503946","https://openalex.org/W1967688128","https://openalex.org/W1971375508","https://openalex.org/W1987552279","https://openalex.org/W1987869189","https://openalex.org/W2001496424","https://openalex.org/W2003094793","https://openalex.org/W2025785578","https://openalex.org/W2033673704","https://openalex.org/W2034190452","https://openalex.org/W2056132907","https://openalex.org/W2057900969","https://openalex.org/W2066792529","https://openalex.org/W2074231493","https://openalex.org/W2076677427","https://openalex.org/W2081322929","https://openalex.org/W2088091054","https://openalex.org/W2089178608","https://openalex.org/W2101006331","https://openalex.org/W2102443632","https://openalex.org/W2110824414","https://openalex.org/W2111938898","https://openalex.org/W2117085409","https://openalex.org/W2118463056","https://openalex.org/W2123402141","https://openalex.org/W2124055239","https://openalex.org/W2134150403","https://openalex.org/W2158130508","https://openalex.org/W2160580257","https://openalex.org/W2170738476","https://openalex.org/W2211192759","https://openalex.org/W2284922225","https://openalex.org/W2293478687","https://openalex.org/W2294500633","https://openalex.org/W2413794162","https://openalex.org/W2432216100","https://openalex.org/W2485143390","https://openalex.org/W2545162188","https://openalex.org/W2556915629","https://openalex.org/W2556917524","https://openalex.org/W2593929528","https://openalex.org/W2736621900","https://openalex.org/W2911964244","https://openalex.org/W3152294918","https://openalex.org/W4214671568","https://openalex.org/W4246540081","https://openalex.org/W4291172695"],"related_works":["https://openalex.org/W2964015157","https://openalex.org/W4301653298","https://openalex.org/W2119262534","https://openalex.org/W152351265","https://openalex.org/W1970026646","https://openalex.org/W3042295250","https://openalex.org/W2994651536","https://openalex.org/W4232755842","https://openalex.org/W2765918440","https://openalex.org/W4237027867"],"abstract_inverted_index":{"Several":[0],"tasks":[1],"related":[2],"to":[3,8],"geographical":[4,10],"information":[5,11],"retrieval":[6],"and":[7,102],"the":[9,18,34,41,49,59,72,77,91,95,106,138],"sciences":[12],"involve":[13],"toponym":[14,50],"matching,":[15],"that":[16,24,90,103],"is,":[17],"problem":[19],"of":[20,36,43,61,75,80,126],"matching":[21,51],"place":[22],"names":[23],"share":[25],"a":[26,37,85],"common":[27],"referent.":[28],"In":[29],"this":[30,134],"article,":[31],"we":[32],"present":[33],"results":[35,132],"wide-ranging":[38],"evaluation":[39],"on":[40,56,118,133],"performance":[42,92],"different":[44],"string":[45],"similarity":[46,68,81,97,107,140],"metrics":[47,98],"over":[48],"task.":[52],"We":[53],"also":[54],"report":[55],"experiments":[57],"involving":[58],"usage":[60],"supervised":[62,119],"machine":[63,120],"learning":[64],"for":[65,94,111],"combining":[66],"multiple":[67],"metrics,":[69],"which":[70],"has":[71],"natural":[73],"advantage":[74],"avoiding":[76],"manual":[78],"tuning":[79,105],"thresholds.":[82],"Experiments":[83],"with":[84],"very":[86],"large":[87],"dataset":[88],"show":[89],"differences":[93],"individual":[96,139],"are":[99],"relatively":[100],"small,":[101],"carefully":[104],"threshold":[108],"is":[109],"important":[110],"achieving":[112],"good":[113,131],"results.":[114],"The":[115],"methods":[116],"based":[117],"learning,":[121],"particularly":[122],"when":[123],"considering":[124],"ensembles":[125],"decision":[127],"trees,":[128],"can":[129],"achieve":[130],"task,":[135],"significantly":[136],"outperforming":[137],"metrics.":[141]},"counts_by_year":[{"year":2026,"cited_by_count":3},{"year":2025,"cited_by_count":5},{"year":2024,"cited_by_count":5},{"year":2023,"cited_by_count":4},{"year":2022,"cited_by_count":6},{"year":2021,"cited_by_count":10},{"year":2020,"cited_by_count":6},{"year":2019,"cited_by_count":6},{"year":2018,"cited_by_count":2},{"year":2017,"cited_by_count":4}],"updated_date":"2026-05-06T08:25:59.206177","created_date":"2025-10-10T00:00:00"}
