{"id":"https://openalex.org/W2162102353","doi":"https://doi.org/10.1109/icde.2003.1260787","title":"Distance based indexing for string proximity search","display_name":"Distance based indexing for string proximity search","publication_year":2004,"publication_date":"2004-05-13","ids":{"openalex":"https://openalex.org/W2162102353","doi":"https://doi.org/10.1109/icde.2003.1260787","mag":"2162102353"},"language":"en","primary_location":{"id":"doi:10.1109/icde.2003.1260787","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icde.2003.1260787","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings 19th International Conference on Data Engineering (Cat. No.03CH37405)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102972238","display_name":"S. Cenk \u015eahinalp","orcid":"https://orcid.org/0000-0002-2170-2808"},"institutions":[{"id":"https://openalex.org/I58956616","display_name":"Case Western Reserve University","ror":"https://ror.org/051fd9666","country_code":"US","type":"education","lineage":["https://openalex.org/I58956616"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"S.C. Sahinalp","raw_affiliation_strings":["Center for Computational Genomics, Case Western Reserve University, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Center for Computational Genomics, Case Western Reserve University, USA","institution_ids":["https://openalex.org/I58956616"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026802459","display_name":"Murat Ta\u015fan","orcid":"https://orcid.org/0000-0003-1490-7626"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"M. Tasan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5112733653","display_name":"Joseph P. Macker","orcid":null},"institutions":[{"id":"https://openalex.org/I58956616","display_name":"Case Western Reserve University","ror":"https://ror.org/051fd9666","country_code":"US","type":"education","lineage":["https://openalex.org/I58956616"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"J. Macker","raw_affiliation_strings":["Department of EECS, Case Western Reserve University, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of EECS, Case Western Reserve University, USA","institution_ids":["https://openalex.org/I58956616"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5013424429","display_name":"Z. Meral \u00d6zsoyo\u011flu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Z.M. Ozsoyoglu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":12.0278,"has_fulltext":false,"cited_by_count":56,"citation_normalized_percentile":{"value":0.98520925,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"125","last_page":"136"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11106","display_name":"Data Management and Algorithms","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10538","display_name":"Data Mining Algorithms and Applications","score":0.9937999844551086,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/edit-distance","display_name":"Edit distance","score":0.9273948073387146},{"id":"https://openalex.org/keywords/string-metric","display_name":"String metric","score":0.7391363978385925},{"id":"https://openalex.org/keywords/string","display_name":"String (physics)","score":0.7034022212028503},{"id":"https://openalex.org/keywords/nearest-neighbor-search","display_name":"Nearest neighbor search","score":0.6330791115760803},{"id":"https://openalex.org/keywords/search-engine-indexing","display_name":"Search engine indexing","score":0.6315934062004089},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6026237607002258},{"id":"https://openalex.org/keywords/levenshtein-distance","display_name":"Levenshtein distance","score":0.5067024827003479},{"id":"https://openalex.org/keywords/approximate-string-matching","display_name":"Approximate string matching","score":0.5016162395477295},{"id":"https://openalex.org/keywords/metric","display_name":"Metric (unit)","score":0.4772919714450836},{"id":"https://openalex.org/keywords/character","display_name":"Character (mathematics)","score":0.4450916051864624},{"id":"https://openalex.org/keywords/hamming-distance","display_name":"Hamming distance","score":0.4167138934135437},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.3398667871952057},{"id":"https://openalex.org/keywords/string-searching-algorithm","display_name":"String searching algorithm","score":0.3215569257736206},{"id":"https://openalex.org/keywords/data-structure","display_name":"Data structure","score":0.3010525405406952},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.29998743534088135},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.2856268286705017},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.2788284718990326},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.2661970555782318}],"concepts":[{"id":"https://openalex.org/C44359876","wikidata":"https://www.wikidata.org/wiki/Q5338467","display_name":"Edit distance","level":2,"score":0.9273948073387146},{"id":"https://openalex.org/C22820288","wikidata":"https://www.wikidata.org/wiki/Q9050568","display_name":"String metric","level":4,"score":0.7391363978385925},{"id":"https://openalex.org/C157486923","wikidata":"https://www.wikidata.org/wiki/Q1376436","display_name":"String (physics)","level":2,"score":0.7034022212028503},{"id":"https://openalex.org/C116738811","wikidata":"https://www.wikidata.org/wiki/Q608751","display_name":"Nearest neighbor search","level":2,"score":0.6330791115760803},{"id":"https://openalex.org/C75165309","wikidata":"https://www.wikidata.org/wiki/Q2258979","display_name":"Search engine indexing","level":2,"score":0.6315934062004089},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6026237607002258},{"id":"https://openalex.org/C2777515626","wikidata":"https://www.wikidata.org/wiki/Q496939","display_name":"Levenshtein distance","level":2,"score":0.5067024827003479},{"id":"https://openalex.org/C32610155","wikidata":"https://www.wikidata.org/wiki/Q1798621","display_name":"Approximate string matching","level":3,"score":0.5016162395477295},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.4772919714450836},{"id":"https://openalex.org/C2780861071","wikidata":"https://www.wikidata.org/wiki/Q1062934","display_name":"Character (mathematics)","level":2,"score":0.4450916051864624},{"id":"https://openalex.org/C193319292","wikidata":"https://www.wikidata.org/wiki/Q272172","display_name":"Hamming distance","level":2,"score":0.4167138934135437},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.3398667871952057},{"id":"https://openalex.org/C7757238","wikidata":"https://www.wikidata.org/wiki/Q374040","display_name":"String searching algorithm","level":3,"score":0.3215569257736206},{"id":"https://openalex.org/C162319229","wikidata":"https://www.wikidata.org/wiki/Q175263","display_name":"Data structure","level":2,"score":0.3010525405406952},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.29998743534088135},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.2856268286705017},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.2788284718990326},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2661970555782318},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C37914503","wikidata":"https://www.wikidata.org/wiki/Q156495","display_name":"Mathematical physics","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C68859911","wikidata":"https://www.wikidata.org/wiki/Q1503724","display_name":"Pattern matching","level":2,"score":0.0},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/icde.2003.1260787","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icde.2003.1260787","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings 19th International Conference on Data Engineering (Cat. No.03CH37405)","raw_type":"proceedings-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.60.3090","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.60.3090","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://compbio.cs.sfu.ca/publications/icde_final.3.pdf","raw_type":"text"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.86.469","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.86.469","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://www.cse.uconn.edu/neds/slides/jan-17-03/ICDE03-final.pdf","raw_type":"text"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":77,"referenced_works":["https://openalex.org/W646049869","https://openalex.org/W938539187","https://openalex.org/W1486088638","https://openalex.org/W1499049447","https://openalex.org/W1502916507","https://openalex.org/W1506631190","https://openalex.org/W1507719695","https://openalex.org/W1509426348","https://openalex.org/W1511329459","https://openalex.org/W1527632333","https://openalex.org/W1540616176","https://openalex.org/W1541459201","https://openalex.org/W1547966682","https://openalex.org/W1554174647","https://openalex.org/W1566022212","https://openalex.org/W1582119223","https://openalex.org/W1587157435","https://openalex.org/W1647671624","https://openalex.org/W1971927246","https://openalex.org/W1978414048","https://openalex.org/W1994529543","https://openalex.org/W2001964584","https://openalex.org/W2004215917","https://openalex.org/W2008196645","https://openalex.org/W2017661493","https://openalex.org/W2028904582","https://openalex.org/W2046042060","https://openalex.org/W2049644877","https://openalex.org/W2050749090","https://openalex.org/W2055043387","https://openalex.org/W2062539466","https://openalex.org/W2072994259","https://openalex.org/W2074231493","https://openalex.org/W2078769454","https://openalex.org/W2087064593","https://openalex.org/W2088179629","https://openalex.org/W2097042476","https://openalex.org/W2097921974","https://openalex.org/W2103014446","https://openalex.org/W2103485833","https://openalex.org/W2105022522","https://openalex.org/W2118269922","https://openalex.org/W2124363644","https://openalex.org/W2125598538","https://openalex.org/W2127230663","https://openalex.org/W2128831915","https://openalex.org/W2144221002","https://openalex.org/W2145725688","https://openalex.org/W2147717514","https://openalex.org/W2151135734","https://openalex.org/W2151318268","https://openalex.org/W2155976638","https://openalex.org/W2157092487","https://openalex.org/W2161936973","https://openalex.org/W2168909179","https://openalex.org/W2238624099","https://openalex.org/W2610179052","https://openalex.org/W2997027240","https://openalex.org/W4235506144","https://openalex.org/W4236236547","https://openalex.org/W4238737563","https://openalex.org/W4285719527","https://openalex.org/W6629760146","https://openalex.org/W6629956336","https://openalex.org/W6630333510","https://openalex.org/W6630456963","https://openalex.org/W6632287593","https://openalex.org/W6632397832","https://openalex.org/W6632963374","https://openalex.org/W6633912359","https://openalex.org/W6636915900","https://openalex.org/W6657664104","https://openalex.org/W6674878074","https://openalex.org/W6675441415","https://openalex.org/W6675816764","https://openalex.org/W6678515977","https://openalex.org/W6683401941"],"related_works":["https://openalex.org/W2461708070","https://openalex.org/W2950268498","https://openalex.org/W1505906253","https://openalex.org/W4280559639","https://openalex.org/W4285090010","https://openalex.org/W2399644331","https://openalex.org/W2102443632","https://openalex.org/W2074064717","https://openalex.org/W2405436873","https://openalex.org/W2187092961"],"abstract_inverted_index":{"In":[0,163],"many":[1],"database":[2],"applications":[3],"involving":[4],"string":[5,60,81,135,207],"data,":[6,208],"it":[7],"is":[8,44,100,119],"common":[9],"to":[10,21,36,78,101,165,198,202],"have":[11],"near":[12],"neighbor":[13,27,105],"queries":[14,28],"(asking":[15,29],"for":[16,30,110,128,134,178],"strings":[17,31,43],"that":[18,32,108,120,139,146],"are":[19,33,63,125,160],"similar":[20,35],"a":[22,37,49,183],"query":[23,38],"string)":[24],"or":[25,73],"nearest":[26],"most":[34,58],"string).":[39],"The":[40,57,97],"similarity":[41],"between":[42,211],"defined":[45],"in":[46],"terms":[47],"of":[48,69,169,188],"distance":[50,61,90,136,148,154,159,170],"function":[51],"determined":[52],"by":[53],"the":[54,83,87,92,152,167],"application":[55],"domain.":[56],"popular":[59],"measures":[62],"based":[64,171,185],"on":[65,186,192,206,221],"(a":[66],"weighted)":[67],"count":[68],"(i)":[70],"character":[71,112,157],"edit":[72,76,89,115,158],"(ii)":[74],"block":[75,114],"operations":[77],"transform":[79],"one":[80],"into":[82],"other.":[84],"Examples":[85],"include":[86],"Levenshtein":[88],"and":[91,113,155,214,225],"recently":[93],"introduced":[94],"compression":[95,153],"distance.":[96],"main":[98],"goal":[99],"develop":[102,182],"efficient":[103],"near(est)":[104],"search":[106,212],"tools":[107],"work":[109],"both":[111],"distances.":[116,190],"Our":[117],"premise":[118],"distance-based":[121],"indexing":[122,172],"methods,":[123],"which":[124],"originally":[126],"designed":[127],"metric":[129],"distances":[130],"can":[131],"be":[132],"modified":[133],"measures,":[137,149],"provided":[138],"they":[140],"form":[141],"almost":[142,161],"metrics.":[143,162],"We":[144,216],"show":[145,196],"several":[147],"such":[150],"as":[151],"weighted":[156],"order":[164],"analyze":[166],"performance":[168,205],"methods":[173],"(in":[174],"particular":[175],"VP":[176,200],"trees)":[177],"strings,":[179],"we":[180,195],"then":[181],"model":[184,194],"distribution":[187],"pairwise":[189],"Based":[191],"this":[193],"how":[197],"modify":[199],"trees":[201],"improve":[203],"their":[204],"providing":[209],"tradeoffs":[210],"time":[213],"space.":[215],"test":[217],"our":[218],"theoretical":[219],"results":[220],"synthetic":[222],"data":[223],"sets":[224],"protein":[226],"strings.":[227]},"counts_by_year":[{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2019,"cited_by_count":1},{"year":2018,"cited_by_count":1},{"year":2016,"cited_by_count":1},{"year":2015,"cited_by_count":1},{"year":2014,"cited_by_count":2},{"year":2013,"cited_by_count":1},{"year":2012,"cited_by_count":2}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
