{"id":"https://openalex.org/W2050653561","doi":"https://doi.org/10.1109/tkde.2014.2309131","title":"Efficiently Supporting Edit Distance Based String Similarity Search Using B&lt;formula formulatype=\"inline\"&gt;&lt;tex Notation=\"TeX\"&gt; $^+$&lt;/tex&gt;&lt;mathgraphic graphicformat=\"GIF\" fileref=\"lu-ieq1-2309131.gif\"/&gt;&lt;/formula&gt;-Trees","display_name":"Efficiently Supporting Edit Distance Based String Similarity Search Using B&lt;formula formulatype=\"inline\"&gt;&lt;tex Notation=\"TeX\"&gt; $^+$&lt;/tex&gt;&lt;mathgraphic graphicformat=\"GIF\" fileref=\"lu-ieq1-2309131.gif\"/&gt;&lt;/formula&gt;-Trees","publication_year":2014,"publication_date":"2014-02-28","ids":{"openalex":"https://openalex.org/W2050653561","doi":"https://doi.org/10.1109/tkde.2014.2309131","mag":"2050653561"},"language":"en","primary_location":{"id":"doi:10.1109/tkde.2014.2309131","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tkde.2014.2309131","pdf_url":null,"source":{"id":"https://openalex.org/S30698027","display_name":"IEEE Transactions on Knowledge and Data Engineering","issn_l":"1041-4347","issn":["1041-4347","1558-2191","2326-3865"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Knowledge and Data Engineering","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5005529387","display_name":"Wei L\u00fc","orcid":"https://orcid.org/0000-0001-6769-2695"},"institutions":[{"id":"https://openalex.org/I165932596","display_name":"National University of Singapore","ror":"https://ror.org/01tgyzw49","country_code":"SG","type":"education","lineage":["https://openalex.org/I165932596"]}],"countries":["SG"],"is_corresponding":true,"raw_author_name":"Wei Lu","raw_affiliation_strings":["National University of Singapore, Singapore, SG","School of Computing, National University of Singapore, Singapore"],"affiliations":[{"raw_affiliation_string":"National University of Singapore, Singapore, SG","institution_ids":["https://openalex.org/I165932596"]},{"raw_affiliation_string":"School of Computing, National University of Singapore, Singapore","institution_ids":["https://openalex.org/I165932596"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5008721449","display_name":"Xiaoyong Du","orcid":"https://orcid.org/0000-0002-5757-9135"},"institutions":[{"id":"https://openalex.org/I1327237609","display_name":"Ministry of Education of the People's Republic of China","ror":"https://ror.org/01mv9t934","country_code":"CN","type":"funder","lineage":["https://openalex.org/I1327237609","https://openalex.org/I4210127390"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaoyong Du","raw_affiliation_strings":["Key Laboratory of Data Engineering and Knowledge Engineering, Ministry of Education, China","Key Laboratory of Data Engineering and Knowledge Engineering, Ministry of Education, China#TAB#"],"affiliations":[{"raw_affiliation_string":"Key Laboratory of Data Engineering and Knowledge Engineering, Ministry of Education, China","institution_ids":["https://openalex.org/I1327237609"]},{"raw_affiliation_string":"Key Laboratory of Data Engineering and Knowledge Engineering, Ministry of Education, China#TAB#","institution_ids":["https://openalex.org/I1327237609"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5058831095","display_name":"Marios Hadjieleftheriou","orcid":null},"institutions":[{"id":"https://openalex.org/I1283103587","display_name":"AT&T (United States)","ror":"https://ror.org/02bbd5539","country_code":"US","type":"company","lineage":["https://openalex.org/I1283103587"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Marios Hadjieleftheriou","raw_affiliation_strings":["AT&T Labs-Research, 180 Park Ave Bldg, 103 Florham Park","[AT&T Labs-Research, 180 Park Ave Bldg, 103 Florham Park]"],"affiliations":[{"raw_affiliation_string":"AT&T Labs-Research, 180 Park Ave Bldg, 103 Florham Park","institution_ids":["https://openalex.org/I1283103587"]},{"raw_affiliation_string":"[AT&T Labs-Research, 180 Park Ave Bldg, 103 Florham Park]","institution_ids":["https://openalex.org/I1283103587"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5024892041","display_name":"Beng Chin Ooi","orcid":"https://orcid.org/0000-0003-4446-1100"},"institutions":[{"id":"https://openalex.org/I165932596","display_name":"National University of Singapore","ror":"https://ror.org/01tgyzw49","country_code":"SG","type":"education","lineage":["https://openalex.org/I165932596"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Beng Chin Ooi","raw_affiliation_strings":["National University of Singapore, Singapore, SG","School of Computing, National University of Singapore, Singapore"],"affiliations":[{"raw_affiliation_string":"National University of Singapore, Singapore, SG","institution_ids":["https://openalex.org/I165932596"]},{"raw_affiliation_string":"School of Computing, National University of Singapore, Singapore","institution_ids":["https://openalex.org/I165932596"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5005529387"],"corresponding_institution_ids":["https://openalex.org/I165932596"],"apc_list":null,"apc_paid":null,"fwci":6.0873,"has_fulltext":false,"cited_by_count":31,"citation_normalized_percentile":{"value":0.96205437,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":99},"biblio":{"volume":"26","issue":"12","first_page":"2983","last_page":"2996"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9969000220298767,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9937000274658203,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/edit-distance","display_name":"Edit distance","score":0.9238778352737427},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.688708484172821},{"id":"https://openalex.org/keywords/string-metric","display_name":"String metric","score":0.6818335056304932},{"id":"https://openalex.org/keywords/string","display_name":"String (physics)","score":0.6782736778259277},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.6304696202278137},{"id":"https://openalex.org/keywords/metric","display_name":"Metric (unit)","score":0.5073837637901306},{"id":"https://openalex.org/keywords/tree","display_name":"Tree (set theory)","score":0.446354478597641},{"id":"https://openalex.org/keywords/combinatorics","display_name":"Combinatorics","score":0.31964027881622314},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.31446629762649536},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.24667584896087646},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.239559143781662},{"id":"https://openalex.org/keywords/string-searching-algorithm","display_name":"String searching algorithm","score":0.2390720248222351},{"id":"https://openalex.org/keywords/pattern-matching","display_name":"Pattern matching","score":0.08250942826271057}],"concepts":[{"id":"https://openalex.org/C44359876","wikidata":"https://www.wikidata.org/wiki/Q5338467","display_name":"Edit distance","level":2,"score":0.9238778352737427},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.688708484172821},{"id":"https://openalex.org/C22820288","wikidata":"https://www.wikidata.org/wiki/Q9050568","display_name":"String metric","level":4,"score":0.6818335056304932},{"id":"https://openalex.org/C157486923","wikidata":"https://www.wikidata.org/wiki/Q1376436","display_name":"String (physics)","level":2,"score":0.6782736778259277},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.6304696202278137},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.5073837637901306},{"id":"https://openalex.org/C113174947","wikidata":"https://www.wikidata.org/wiki/Q2859736","display_name":"Tree (set theory)","level":2,"score":0.446354478597641},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.31964027881622314},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.31446629762649536},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.24667584896087646},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.239559143781662},{"id":"https://openalex.org/C7757238","wikidata":"https://www.wikidata.org/wiki/Q374040","display_name":"String searching algorithm","level":3,"score":0.2390720248222351},{"id":"https://openalex.org/C68859911","wikidata":"https://www.wikidata.org/wiki/Q1503724","display_name":"Pattern matching","level":2,"score":0.08250942826271057},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0},{"id":"https://openalex.org/C37914503","wikidata":"https://www.wikidata.org/wiki/Q156495","display_name":"Mathematical physics","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tkde.2014.2309131","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tkde.2014.2309131","pdf_url":null,"source":{"id":"https://openalex.org/S30698027","display_name":"IEEE Transactions on Knowledge and Data Engineering","issn_l":"1041-4347","issn":["1041-4347","1558-2191","2326-3865"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Knowledge and Data Engineering","raw_type":"journal-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.455.6528","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.455.6528","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://www.comp.nus.edu.sg/~ooibc/stringtkde14.pdf","raw_type":"text"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":39,"referenced_works":["https://openalex.org/W125979907","https://openalex.org/W1566022212","https://openalex.org/W1600463518","https://openalex.org/W1973001156","https://openalex.org/W1990565597","https://openalex.org/W1994655805","https://openalex.org/W2012010763","https://openalex.org/W2024605621","https://openalex.org/W2036413831","https://openalex.org/W2049003051","https://openalex.org/W2073329022","https://openalex.org/W2096598900","https://openalex.org/W2100548092","https://openalex.org/W2107293766","https://openalex.org/W2112912553","https://openalex.org/W2113875810","https://openalex.org/W2119057313","https://openalex.org/W2121269638","https://openalex.org/W2121516976","https://openalex.org/W2122465391","https://openalex.org/W2127675794","https://openalex.org/W2129750215","https://openalex.org/W2130825214","https://openalex.org/W2136716899","https://openalex.org/W2143124645","https://openalex.org/W2148148676","https://openalex.org/W2154931799","https://openalex.org/W2158779275","https://openalex.org/W2161936973","https://openalex.org/W2162592052","https://openalex.org/W2167847032","https://openalex.org/W6605088862","https://openalex.org/W6633912359","https://openalex.org/W6674576723","https://openalex.org/W6676669657","https://openalex.org/W6677631532","https://openalex.org/W6680962907","https://openalex.org/W6682887809","https://openalex.org/W6683401941"],"related_works":["https://openalex.org/W2950268498","https://openalex.org/W2102443632","https://openalex.org/W1505906253","https://openalex.org/W2007540612","https://openalex.org/W2461708070","https://openalex.org/W1982055477","https://openalex.org/W1815899388","https://openalex.org/W2162102353","https://openalex.org/W2061135126","https://openalex.org/W2463404432"],"abstract_inverted_index":{"Edit":[0],"distance":[1,17,94,128],"is":[2,22,129,215],"widely":[3],"used":[4],"for":[5,42,225],"measuring":[6],"the":[7,49,122,135,169,194,208,212,227,240,251],"similarity":[8,20,46,97,115],"between":[9],"two":[10,182],"strings.":[11,146,178],"As":[12],"a":[13,27,33,130,142,155,222,258],"primitive":[14],"operation,":[15],"edit":[16,38,93,127],"based":[18,89,95,167,192,278],"string":[19,36,45,96,114,136],"search":[21,116],"to":[23,32,91,141,174,184,238,247],"find":[24],"strings":[25,150,173,229,244],"in":[26,67,74,121,125,151,292],"collection":[28,137],"that":[29,60,126,207,245,266],"are":[30,65],"similar":[31],"given":[34],"query":[35,252],"using":[37,53,117,154],"distance.":[39],"Existing":[40],"approaches":[41,58,90,102,183,279],"answering":[43],"such":[44],"queries":[47,291],"follow":[48],"filter-and-verify":[50],"framework":[51],"by":[52],"various":[54],"indexes.":[55],"Typically,":[56],"most":[57,293],"assume":[59],"indexes":[61],"and":[62,99,188,219,231,289],"data":[63,213,262],"sets":[64],"maintained":[66],"main":[68],"memory.":[69],"To":[70],"overcome":[71],"this":[72,75],"limitation,":[73],"paper,":[76],"we":[77,112,133,148,180,264],"propose":[78,181,221],"B":[79,157,195,268],"<formula":[80,158,196,269],"formulatype=\"inline\"":[81,159,197,270],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[82,160,198,271],"xmlns:xlink=\"http://www.w3.org/1999/xlink\"><tex":[83,161,199,272],"Notation=\"TeX\">":[84],"$^+$</tex><mathgraphic":[85],"fileref=\"lu-ieq2-2309131.gif\"":[86],"graphicformat=\"GIF\"/></formula>":[87,165,203,276],"-tree":[88,166,277],"answer":[92,113,186],"queries,":[98,190],"hence,":[100],"our":[101,267],"can":[103],"be":[104,248],"easily":[105],"integrated":[106],"into":[107,138],"existing":[108],"RDBMSs.":[109],"In":[110],"general,":[111],"pruning":[118],"techniques":[119,285],"employed":[120],"metric":[123],"space":[124],"metric.":[131],"First,":[132],"split":[134],"partitions":[139,153],"according":[140],"set":[143,214],"of":[144,171,211,243,260],"reference":[145,177,228],"Then,":[147],"index":[149],"all":[152],"single":[156],"Notation=\"TeX\">$^+$</tex>":[162,200,273],"<mathgraphic":[163,201,274],"fileref=\"lu-ieq3-2309131.gif\"":[164],"on":[168,193,286],"distances":[170],"these":[172],"their":[175],"corresponding":[176],"Finally,":[179],"efficiently":[185],"range":[187,288],"KNN":[189,290],"respectively,":[191],"fileref=\"lu-ieq4-2309131.gif\"":[202],"-tree.":[204],"We":[205],"prove":[206],"optimal":[209,234],"partitioning":[210],"an":[216,233],"NP-hard":[217],"problem,":[218],"therefore":[220],"heuristic":[223],"approach":[224],"selecting":[226],"greedily":[230],"present":[232],"partition":[235],"assignment":[236],"strategy":[237],"minimize":[239],"expected":[241],"number":[242],"need":[246],"verified":[249],"during":[250],"evaluation.":[253],"Through":[254],"extensive":[255],"experiments":[256],"over":[257,283],"variety":[259],"real":[261],"sets,":[263],"demonstrate":[265],"fileref=\"lu-ieq5-2309131.gif\"":[275],"provide":[280],"superior":[281],"performance":[282],"state-of-the-art":[284],"both":[287],"cases.":[294]},"counts_by_year":[{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":4},{"year":2020,"cited_by_count":5},{"year":2019,"cited_by_count":2},{"year":2018,"cited_by_count":3},{"year":2017,"cited_by_count":6},{"year":2016,"cited_by_count":5},{"year":2015,"cited_by_count":4}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
