{"id":"https://openalex.org/W1989000808","doi":"https://doi.org/10.5220/0005170005050511","title":"Applying Information-theoretic and Edit Distance Approaches to Flexibly Measure Lexical Similarity","display_name":"Applying Information-theoretic and Edit Distance Approaches to Flexibly Measure Lexical Similarity","publication_year":2014,"publication_date":"2014-01-01","ids":{"openalex":"https://openalex.org/W1989000808","doi":"https://doi.org/10.5220/0005170005050511","mag":"1989000808"},"language":"en","primary_location":{"id":"doi:10.5220/0005170005050511","is_oa":true,"landing_page_url":"https://doi.org/10.5220/0005170005050511","pdf_url":null,"source":null,"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the International Conference on Knowledge Discovery and Information Retrieval","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.5220/0005170005050511","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5107862829","display_name":"Thi Thuy Anh Nguyen","orcid":null},"institutions":[{"id":"https://openalex.org/I44260953","display_name":"Heinrich Heine University D\u00fcsseldorf","ror":"https://ror.org/024z2rq82","country_code":"DE","type":"education","lineage":["https://openalex.org/I44260953"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Thi Thuy Anh Nguyen","raw_affiliation_strings":["Heinrich-Heine-University D\u00fcsseldorf, Germany"],"affiliations":[{"raw_affiliation_string":"Heinrich-Heine-University D\u00fcsseldorf, Germany","institution_ids":["https://openalex.org/I44260953"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5037038868","display_name":"Stefan Conrad","orcid":"https://orcid.org/0000-0003-2788-3854"},"institutions":[{"id":"https://openalex.org/I44260953","display_name":"Heinrich Heine University D\u00fcsseldorf","ror":"https://ror.org/024z2rq82","country_code":"DE","type":"education","lineage":["https://openalex.org/I44260953"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Stefan Conrad","raw_affiliation_strings":["Heinrich-Heine-University D\u00fcsseldorf, Germany"],"affiliations":[{"raw_affiliation_string":"Heinrich-Heine-University D\u00fcsseldorf, Germany","institution_ids":["https://openalex.org/I44260953"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5107862829"],"corresponding_institution_ids":["https://openalex.org/I44260953"],"apc_list":null,"apc_paid":null,"fwci":0.5823,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.65164053,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"505","last_page":"511"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11106","display_name":"Data Management and Algorithms","score":0.9872000217437744,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11106","display_name":"Data Management and Algorithms","score":0.9872000217437744,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11063","display_name":"Rough Sets and Fuzzy Logic","score":0.9807000160217285,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9807000160217285,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7208045125007629},{"id":"https://openalex.org/keywords/edit-distance","display_name":"Edit distance","score":0.710706353187561},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.6937720775604248},{"id":"https://openalex.org/keywords/measure","display_name":"Measure (data warehouse)","score":0.5993603467941284},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5960146188735962},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5528563261032104},{"id":"https://openalex.org/keywords/similarity-measure","display_name":"Similarity measure","score":0.5473433136940002},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.5455273389816284},{"id":"https://openalex.org/keywords/dice","display_name":"Dice","score":0.513170063495636},{"id":"https://openalex.org/keywords/precision-and-recall","display_name":"Precision and recall","score":0.5084520578384399},{"id":"https://openalex.org/keywords/distance-measures","display_name":"Distance measures","score":0.4106020927429199},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.3920610845088959},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.3808167576789856},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.2024824619293213},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.06564190983772278},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.06535366177558899}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7208045125007629},{"id":"https://openalex.org/C44359876","wikidata":"https://www.wikidata.org/wiki/Q5338467","display_name":"Edit distance","level":2,"score":0.710706353187561},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.6937720775604248},{"id":"https://openalex.org/C2780009758","wikidata":"https://www.wikidata.org/wiki/Q6804172","display_name":"Measure (data warehouse)","level":2,"score":0.5993603467941284},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5960146188735962},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5528563261032104},{"id":"https://openalex.org/C2776517306","wikidata":"https://www.wikidata.org/wiki/Q29017317","display_name":"Similarity measure","level":2,"score":0.5473433136940002},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.5455273389816284},{"id":"https://openalex.org/C22029948","wikidata":"https://www.wikidata.org/wiki/Q45089","display_name":"Dice","level":2,"score":0.513170063495636},{"id":"https://openalex.org/C81669768","wikidata":"https://www.wikidata.org/wiki/Q2359161","display_name":"Precision and recall","level":2,"score":0.5084520578384399},{"id":"https://openalex.org/C2639959","wikidata":"https://www.wikidata.org/wiki/Q1344778","display_name":"Distance measures","level":2,"score":0.4106020927429199},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.3920610845088959},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3808167576789856},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.2024824619293213},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.06564190983772278},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.06535366177558899},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.5220/0005170005050511","is_oa":true,"landing_page_url":"https://doi.org/10.5220/0005170005050511","pdf_url":null,"source":null,"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the International Conference on Knowledge Discovery and Information Retrieval","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.5220/0005170005050511","is_oa":true,"landing_page_url":"https://doi.org/10.5220/0005170005050511","pdf_url":null,"source":null,"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the International Conference on Knowledge Discovery and Information Retrieval","raw_type":"proceedings-article"},"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.5799999833106995}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W3001325086","https://openalex.org/W2797255169","https://openalex.org/W4299602258","https://openalex.org/W1550039237","https://openalex.org/W2180288160","https://openalex.org/W1579746959","https://openalex.org/W4225740479","https://openalex.org/W2069148796","https://openalex.org/W2102177745","https://openalex.org/W2142462752"],"abstract_inverted_index":{"Measurement":[0],"of":[1,64],"similarity":[2,33,80],"plays":[3],"an":[4],"important":[5],"role":[6],"in":[7,54,73],"data":[8],"mining":[9],"and":[10,28,84,121],"information":[11],"retrieval.":[12],"Several":[13],"techniques\r\n\r\nfor":[14],"calculating":[15],"the":[16,62,67,91,111],"similarities":[17],"between":[18,45],"objects":[19],"have":[20],"been":[21],"proposed":[22,112],"so":[23],"far,":[24],"for":[25],"example,":[26],"lexical-based,\r\n\r\nstructure-based":[27],"instance-based":[29],"measures.":[30],"Existing":[31],"lexical":[32,79],"measures":[34,49],"usually":[35],"base":[36],"on":[37],"either":[38],"ngrams\r\n\r\nor":[39],"Dice\u00e2\u0080\u0099s":[40],"approaches":[41],"to":[42,87,109,127],"obtain":[43],"correspondences":[44,89],"strings.":[46,74],"Although":[47],"these":[48],"are":[50,52,58,66,71,107],"efficient,\r\n\r\nthey":[51],"inadequate":[53],"situations":[55],"where":[56],"strings":[57],"quite":[59],"similar":[60],"or":[61],"sets":[63],"characters":[65],"same":[68],"but":[69],"their\r\n\r\npositions":[70],"different":[72],"In":[75],"this":[76],"paper,":[77],"a":[78],"approach":[81,118],"combining":[82],"information-theoretic\r\n\r\nmodel":[83],"edit":[85],"distance":[86],"determine":[88],"among":[90],"concept":[92],"labels":[93],"is":[94,119],"developed.":[95],"Precision,":[96],"Recall\r\n\r\nand":[97],"F-measure":[98],"as":[99,101],"well":[100],"partial":[102],"OAEI":[103],"benchmark":[104],"2008":[105],"tests":[106],"used":[108],"evaluate":[110],"method.\r\n\r\nThe":[113],"results":[114],"show":[115],"that":[116],"our":[117],"flexible":[120],"has":[122],"some":[123],"prominent":[124],"features":[125],"compared":[126],"other":[128],"lexical-based\r\n\r\nmethods.":[129]},"counts_by_year":[{"year":2015,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
