{"id":"https://openalex.org/W2011106203","doi":"https://doi.org/10.1145/1458082.1458138","title":"TinyLex","display_name":"TinyLex","publication_year":2008,"publication_date":"2008-10-26","ids":{"openalex":"https://openalex.org/W2011106203","doi":"https://doi.org/10.1145/1458082.1458138","mag":"2011106203"},"language":"en","primary_location":{"id":"doi:10.1145/1458082.1458138","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1458082.1458138","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 17th ACM conference on Information and knowledge management","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5036108603","display_name":"Derrick Coetzee","orcid":null},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Derrick Coetzee","raw_affiliation_strings":["Microsoft Research, Microsoft Corporation, Redmond, WA, USA"],"affiliations":[{"raw_affiliation_string":"Microsoft Research, Microsoft Corporation, Redmond, WA, USA","institution_ids":["https://openalex.org/I1290206253"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5036108603"],"corresponding_institution_ids":["https://openalex.org/I1290206253"],"apc_list":null,"apc_paid":null,"fwci":2.2882,"has_fulltext":false,"cited_by_count":8,"citation_normalized_percentile":{"value":0.89372132,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"409","last_page":"418"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9966999888420105,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9919999837875366,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/substring","display_name":"Substring","score":0.8007566332817078},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7788565158843994},{"id":"https://openalex.org/keywords/n-gram","display_name":"n-gram","score":0.6712950468063354},{"id":"https://openalex.org/keywords/index","display_name":"Index (typography)","score":0.5703659057617188},{"id":"https://openalex.org/keywords/pruning","display_name":"Pruning","score":0.5084399580955505},{"id":"https://openalex.org/keywords/inverted-index","display_name":"Inverted index","score":0.5041970014572144},{"id":"https://openalex.org/keywords/lexicon","display_name":"Lexicon","score":0.4868423342704773},{"id":"https://openalex.org/keywords/sentence","display_name":"Sentence","score":0.45671218633651733},{"id":"https://openalex.org/keywords/phrase","display_name":"Phrase","score":0.4380251467227936},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.43542978167533875},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.37715503573417664},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3756391704082489},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.28940993547439575},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.21936854720115662},{"id":"https://openalex.org/keywords/data-structure","display_name":"Data structure","score":0.21134477853775024},{"id":"https://openalex.org/keywords/search-engine-indexing","display_name":"Search engine indexing","score":0.15253689885139465}],"concepts":[{"id":"https://openalex.org/C182407805","wikidata":"https://www.wikidata.org/wiki/Q2626534","display_name":"Substring","level":3,"score":0.8007566332817078},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7788565158843994},{"id":"https://openalex.org/C117884012","wikidata":"https://www.wikidata.org/wiki/Q94489","display_name":"n-gram","level":3,"score":0.6712950468063354},{"id":"https://openalex.org/C2777382242","wikidata":"https://www.wikidata.org/wiki/Q6017816","display_name":"Index (typography)","level":2,"score":0.5703659057617188},{"id":"https://openalex.org/C108010975","wikidata":"https://www.wikidata.org/wiki/Q500094","display_name":"Pruning","level":2,"score":0.5084399580955505},{"id":"https://openalex.org/C130590232","wikidata":"https://www.wikidata.org/wiki/Q1671754","display_name":"Inverted index","level":3,"score":0.5041970014572144},{"id":"https://openalex.org/C2778121359","wikidata":"https://www.wikidata.org/wiki/Q8096","display_name":"Lexicon","level":2,"score":0.4868423342704773},{"id":"https://openalex.org/C2777530160","wikidata":"https://www.wikidata.org/wiki/Q41796","display_name":"Sentence","level":2,"score":0.45671218633651733},{"id":"https://openalex.org/C2776224158","wikidata":"https://www.wikidata.org/wiki/Q187931","display_name":"Phrase","level":2,"score":0.4380251467227936},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.43542978167533875},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.37715503573417664},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3756391704082489},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.28940993547439575},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.21936854720115662},{"id":"https://openalex.org/C162319229","wikidata":"https://www.wikidata.org/wiki/Q175263","display_name":"Data structure","level":2,"score":0.21134477853775024},{"id":"https://openalex.org/C75165309","wikidata":"https://www.wikidata.org/wiki/Q2258979","display_name":"Search engine indexing","level":2,"score":0.15253689885139465},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C6557445","wikidata":"https://www.wikidata.org/wiki/Q173113","display_name":"Agronomy","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/1458082.1458138","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1458082.1458138","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 17th ACM conference on Information and knowledge management","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.6200000047683716,"id":"https://metadata.un.org/sdg/10","display_name":"Reduced inequalities"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":26,"referenced_works":["https://openalex.org/W72059462","https://openalex.org/W125979907","https://openalex.org/W148400104","https://openalex.org/W1490578079","https://openalex.org/W1516636695","https://openalex.org/W1529205966","https://openalex.org/W1559631118","https://openalex.org/W1797288984","https://openalex.org/W1860144983","https://openalex.org/W1983437400","https://openalex.org/W2006608770","https://openalex.org/W2006997130","https://openalex.org/W2008434289","https://openalex.org/W2010027198","https://openalex.org/W2022292926","https://openalex.org/W2033943297","https://openalex.org/W2068066080","https://openalex.org/W2068143774","https://openalex.org/W2119878143","https://openalex.org/W2123845384","https://openalex.org/W2128016314","https://openalex.org/W2135290016","https://openalex.org/W2163652601","https://openalex.org/W2168909179","https://openalex.org/W2992267273","https://openalex.org/W4285719527"],"related_works":["https://openalex.org/W2149530645","https://openalex.org/W2765525826","https://openalex.org/W2141389247","https://openalex.org/W1981732427","https://openalex.org/W2357929520","https://openalex.org/W2184716188","https://openalex.org/W2141732239","https://openalex.org/W2038618507","https://openalex.org/W2101881908","https://openalex.org/W416633688"],"abstract_inverted_index":{"Inverted":[0],"indexes":[1],"using":[2,186],"sequences":[3],"of":[4,31,40,81,85,90,93,102,117,172],"characters":[5,190],"(n-grams)":[6],"as":[7,191],"terms":[8,92],"provide":[9],"an":[10,103],"error-resilient":[11],"and":[12,20,53,119,165,193],"language-independent":[13],"way":[14],"to":[15,111,157,166,196],"query":[16,47,60,128],"for":[17,145],"arbitrary":[18],"substrings":[19],"perform":[21],"approximate":[22],"matching":[23],"in":[24,71,123,136],"a":[25,29,36],"text,":[26],"but":[27],"present":[28],"number":[30,39],"practical":[32],"problems:":[33],"they":[34,42,54],"have":[35],"very":[37,58],"large":[38],"terms,":[41],"exhibit":[43],"pathologically":[44],"expensive":[45],"worst-case":[46,160],"times":[48,113],"on":[49,162,170],"certain":[50],"natural":[51],"inputs,":[52],"cannot":[55],"cope":[56],"with":[57],"short":[59],"strings.":[61],"In":[62],"word-based":[63],"indexes,":[64],"static":[65],"index":[66,73,105,125,156,185],"pruning":[67],"has":[68],"been":[69],"successful":[70],"reducing":[72],"size":[74,101,126],"while":[75],"maintaining":[76],"precision,":[77],"at":[78],"the":[79,86,99,131,142,152,181],"expense":[80],"recall.":[82],"Taking":[83],"advantage":[84],"unique":[87],"inclusion":[88],"structure":[89],"n-gram":[91,104,155,184],"different":[94],"lengths,":[95],"we":[96,178],"show":[97],"that":[98],"lexicon":[100,132],"can":[106],"be":[107],"reduced":[108],"by":[109],"7":[110],"15":[112],"without":[114,120],"any":[115,121,173],"loss":[116],"recall,":[118],"increase":[122],"either":[124],"or":[127],"time.":[129],"Because":[130],"is":[133,150],"typically":[134],"stored":[135],"main":[137],"memory,":[138],"this":[139,176],"substantially":[140],"reduces":[141],"memory":[143],"required":[144],"queries.":[146],"Simultaneously,":[147],"our":[148],"construction":[149],"also":[151,179],"first":[153,182],"overlapping":[154],"place":[158],"tunable":[159],"bounds":[161],"false":[163],"positives":[164],"permit":[167],"efficient":[168],"queries":[169],"strings":[171],"length.":[174],"Using":[175],"construction,":[177],"demonstrate":[180],"feasible":[183],"words":[187],"rather":[188],"than":[189],"units,":[192],"its":[194],"applications":[195],"phrase":[197],"searching.":[198]},"counts_by_year":[{"year":2015,"cited_by_count":1},{"year":2013,"cited_by_count":2},{"year":2012,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2016-06-24T00:00:00"}
