{"id":"https://openalex.org/W2075663964","doi":"https://doi.org/10.1145/1183614.1183623","title":"An approximate multi-word matching algorithm for robust document retrieval","display_name":"An approximate multi-word matching algorithm for robust document retrieval","publication_year":2006,"publication_date":"2006-01-01","ids":{"openalex":"https://openalex.org/W2075663964","doi":"https://doi.org/10.1145/1183614.1183623","mag":"2075663964"},"language":"en","primary_location":{"id":"doi:10.1145/1183614.1183623","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1183614.1183623","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 15th ACM international conference on Information and knowledge management  - CIKM '06","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5087434029","display_name":"Atsuhiro Takasu","orcid":"https://orcid.org/0000-0002-9061-7949"},"institutions":[{"id":"https://openalex.org/I184597095","display_name":"National Institute of Informatics","ror":"https://ror.org/04ksd4g47","country_code":"JP","type":"facility","lineage":["https://openalex.org/I1319490839","https://openalex.org/I184597095","https://openalex.org/I4210158934"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Atsuhiro Takasu","raw_affiliation_strings":["National Institute of Informatics, Chiyoda-ku, Tokyo, Japan","National Institute of Informatics, Chiyoda-Ku Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"National Institute of Informatics, Chiyoda-ku, Tokyo, Japan","institution_ids":["https://openalex.org/I184597095"]},{"raw_affiliation_string":"National Institute of Informatics, Chiyoda-Ku Tokyo, Japan","institution_ids":["https://openalex.org/I184597095"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5087434029"],"corresponding_institution_ids":["https://openalex.org/I184597095"],"apc_list":null,"apc_paid":null,"fwci":1.807,"has_fulltext":false,"cited_by_count":7,"citation_normalized_percentile":{"value":0.86983784,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"34","last_page":"34"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9976999759674072,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9957000017166138,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/substring","display_name":"Substring","score":0.8761135935783386},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7562118768692017},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.72162926197052},{"id":"https://openalex.org/keywords/string","display_name":"String (physics)","score":0.6328726410865784},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.6305394172668457},{"id":"https://openalex.org/keywords/character","display_name":"Character (mathematics)","score":0.5900033712387085},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5668700933456421},{"id":"https://openalex.org/keywords/string-searching-algorithm","display_name":"String searching algorithm","score":0.5373700261116028},{"id":"https://openalex.org/keywords/dynamic-programming","display_name":"Dynamic programming","score":0.5189604163169861},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.48851311206817627},{"id":"https://openalex.org/keywords/edit-distance","display_name":"Edit distance","score":0.4704671800136566},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4659821391105652},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.4588914215564728},{"id":"https://openalex.org/keywords/tree","display_name":"Tree (set theory)","score":0.455073744058609},{"id":"https://openalex.org/keywords/pattern-matching","display_name":"Pattern matching","score":0.4281703531742096},{"id":"https://openalex.org/keywords/time-complexity","display_name":"Time complexity","score":0.4224262237548828},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3844013214111328},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.1762717366218567},{"id":"https://openalex.org/keywords/combinatorics","display_name":"Combinatorics","score":0.09520089626312256},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.07661589980125427}],"concepts":[{"id":"https://openalex.org/C182407805","wikidata":"https://www.wikidata.org/wiki/Q2626534","display_name":"Substring","level":3,"score":0.8761135935783386},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7562118768692017},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.72162926197052},{"id":"https://openalex.org/C157486923","wikidata":"https://www.wikidata.org/wiki/Q1376436","display_name":"String (physics)","level":2,"score":0.6328726410865784},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.6305394172668457},{"id":"https://openalex.org/C2780861071","wikidata":"https://www.wikidata.org/wiki/Q1062934","display_name":"Character (mathematics)","level":2,"score":0.5900033712387085},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5668700933456421},{"id":"https://openalex.org/C7757238","wikidata":"https://www.wikidata.org/wiki/Q374040","display_name":"String searching algorithm","level":3,"score":0.5373700261116028},{"id":"https://openalex.org/C37404715","wikidata":"https://www.wikidata.org/wiki/Q380679","display_name":"Dynamic programming","level":2,"score":0.5189604163169861},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.48851311206817627},{"id":"https://openalex.org/C44359876","wikidata":"https://www.wikidata.org/wiki/Q5338467","display_name":"Edit distance","level":2,"score":0.4704671800136566},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4659821391105652},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.4588914215564728},{"id":"https://openalex.org/C113174947","wikidata":"https://www.wikidata.org/wiki/Q2859736","display_name":"Tree (set theory)","level":2,"score":0.455073744058609},{"id":"https://openalex.org/C68859911","wikidata":"https://www.wikidata.org/wiki/Q1503724","display_name":"Pattern matching","level":2,"score":0.4281703531742096},{"id":"https://openalex.org/C311688","wikidata":"https://www.wikidata.org/wiki/Q2393193","display_name":"Time complexity","level":2,"score":0.4224262237548828},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3844013214111328},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.1762717366218567},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.09520089626312256},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.07661589980125427},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C37914503","wikidata":"https://www.wikidata.org/wiki/Q156495","display_name":"Mathematical physics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/1183614.1183623","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1183614.1183623","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 15th ACM international conference on Information and knowledge management  - CIKM '06","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":27,"referenced_works":["https://openalex.org/W22160234","https://openalex.org/W47279479","https://openalex.org/W1493310097","https://openalex.org/W1541858452","https://openalex.org/W1546029355","https://openalex.org/W1559144499","https://openalex.org/W1660390307","https://openalex.org/W1901493722","https://openalex.org/W1976978162","https://openalex.org/W2001496424","https://openalex.org/W2010595692","https://openalex.org/W2011632873","https://openalex.org/W2022146129","https://openalex.org/W2043481183","https://openalex.org/W2047203512","https://openalex.org/W2052899946","https://openalex.org/W2075798838","https://openalex.org/W2080141381","https://openalex.org/W2085763602","https://openalex.org/W2093168600","https://openalex.org/W2102443632","https://openalex.org/W2135494327","https://openalex.org/W2145130412","https://openalex.org/W2155818555","https://openalex.org/W2176553566","https://openalex.org/W2296273095","https://openalex.org/W2415381616"],"related_works":["https://openalex.org/W2136519138","https://openalex.org/W52396946","https://openalex.org/W2902482624","https://openalex.org/W2902555313","https://openalex.org/W2376315482","https://openalex.org/W4249054680","https://openalex.org/W3112977702","https://openalex.org/W2218650848","https://openalex.org/W2366227422","https://openalex.org/W2399644331"],"abstract_inverted_index":{"Document":[0],"generation":[1],"from":[2],"low":[3],"level":[4,92],"data":[5],"and":[6,38,125,155],"its":[7],"utilization":[8,30],"is":[9,22,75,134,176],"one":[10],"of":[11,44,63,86,114,122,130,143,160,163],"the":[12,27,61,72,87,90,115,123,131,141,146,149,153,158,161,164,173],"most":[13],"challenging":[14],"tasks":[15],"in":[16,26,66,89,108,119,145],"document":[17,29],"engineering.":[18],"Word":[19],"occurrence":[20],"detection":[21],"a":[23,33,42,48,67,78,84,183],"fundamental":[24],"problem":[25],"recognized":[28],"obtained":[31],"by":[32,77],"recognizer,":[34],"such":[35,46],"as":[36,47,93,95],"OCR":[37],"speech":[39],"recognition.":[40],"Given":[41],"set":[43,151],"words,":[45],"dictionary,":[49],"this":[50,70],"paper":[51,171],"proposes":[52],"an":[53],"efficient":[54],"dynamic":[55],"programming":[56],"(DP)":[57],"algorithm":[58,101,133,175],"to":[59,105,110],"find":[60],"occurrences":[62],"each":[64],"word":[65,150],"text.":[68],"In":[69],"paper,":[71],"string":[73,168],"similarity":[74,80],"measured":[76],"statistical":[79],"model":[81,165],"that":[82],"enables":[83],"definition":[85],"similarities":[88,107,113],"character":[91],"well":[94],"edit":[96],"operation":[97],"level.":[98],"The":[99,127],"proposed":[100,132,174],"uses":[102],"tree":[103],"structures":[104],"measure":[106],"order":[109],"avoid":[111],"measuring":[112],"same":[116],"substrings":[117],"appearing":[118],"different":[120],"parts":[121],"text":[124],"words.":[126],"time":[128],"complexity":[129],"O(|W|\u22c5|S|\u22c5|Q|),":[135],"where":[136],"|W|":[137],"(resp.":[138,152],"|S|)":[139],"denote":[140],"number":[142,159],"nodes":[144],"trees":[147],"representing":[148],"text),":[154],"|Q|":[156],"donotes":[157],"states":[162],"used":[166],"for":[167],"similarity.":[169],"This":[170],"shows":[172],"experimentally":[177],"about":[178],"six":[179],"times":[180],"faster":[181],"than":[182],"naive":[184],"DP":[185],"algorithm.":[186]},"counts_by_year":[{"year":2016,"cited_by_count":1},{"year":2013,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
