{"id":"https://openalex.org/W2133843345","doi":"https://doi.org/10.1017/s1351324911000349","title":"A fast and flexible architecture for very large word n-gram datasets","display_name":"A fast and flexible architecture for very large word n-gram datasets","publication_year":2012,"publication_date":"2012-01-10","ids":{"openalex":"https://openalex.org/W2133843345","doi":"https://doi.org/10.1017/s1351324911000349","mag":"2133843345"},"language":"en","primary_location":{"id":"doi:10.1017/s1351324911000349","is_oa":false,"landing_page_url":"https://doi.org/10.1017/s1351324911000349","pdf_url":null,"source":{"id":"https://openalex.org/S18088403","display_name":"Natural Language Engineering","issn_l":"1351-3249","issn":["1351-3249","1469-8110"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310311721","host_organization_name":"Cambridge University Press","host_organization_lineage":["https://openalex.org/P4310311721","https://openalex.org/P4310311702"],"host_organization_lineage_names":["Cambridge University Press","University of Cambridge"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Natural Language Engineering","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5066547058","display_name":"Michael Flor","orcid":"https://orcid.org/0000-0002-3320-5729"},"institutions":[{"id":"https://openalex.org/I1341030882","display_name":"Educational Testing Service","ror":"https://ror.org/03b5q4637","country_code":"US","type":"nonprofit","lineage":["https://openalex.org/I1341030882"]},{"id":"https://openalex.org/I20089843","display_name":"Princeton University","ror":"https://ror.org/00hx57361","country_code":"US","type":"education","lineage":["https://openalex.org/I20089843"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"MICHAEL FLOR","raw_affiliation_strings":["NLP and Speech Group, Educational Testing Service, Princeton, NJ 08541, USA e-mail:","NLP and Speech Group, Educational Testing Service, Princeton, NJ 08541, USA"],"affiliations":[{"raw_affiliation_string":"NLP and Speech Group, Educational Testing Service, Princeton, NJ 08541, USA e-mail:","institution_ids":["https://openalex.org/I1341030882"]},{"raw_affiliation_string":"NLP and Speech Group, Educational Testing Service, Princeton, NJ 08541, USA","institution_ids":["https://openalex.org/I20089843"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5066547058"],"corresponding_institution_ids":["https://openalex.org/I1341030882","https://openalex.org/I20089843"],"apc_list":null,"apc_paid":null,"fwci":4.2813,"has_fulltext":false,"cited_by_count":11,"citation_normalized_percentile":{"value":0.94360906,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":"19","issue":"1","first_page":"61","last_page":"93"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9958999752998352,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.9240354895591736},{"id":"https://openalex.org/keywords/trie","display_name":"Trie","score":0.5605223774909973},{"id":"https://openalex.org/keywords/architecture","display_name":"Architecture","score":0.5186304450035095},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.5173087120056152},{"id":"https://openalex.org/keywords/search-engine-indexing","display_name":"Search engine indexing","score":0.48019611835479736},{"id":"https://openalex.org/keywords/software-portability","display_name":"Software portability","score":0.4711386561393738},{"id":"https://openalex.org/keywords/flexibility","display_name":"Flexibility (engineering)","score":0.4244036078453064},{"id":"https://openalex.org/keywords/n-gram","display_name":"n-gram","score":0.42096590995788574},{"id":"https://openalex.org/keywords/memory-map","display_name":"Memory map","score":0.41718336939811707},{"id":"https://openalex.org/keywords/huffman-coding","display_name":"Huffman coding","score":0.4110109210014343},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.370635986328125},{"id":"https://openalex.org/keywords/data-structure","display_name":"Data structure","score":0.3167749345302582},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.2973390221595764},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.24495503306388855},{"id":"https://openalex.org/keywords/data-compression","display_name":"Data compression","score":0.24141010642051697},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.21750527620315552},{"id":"https://openalex.org/keywords/shared-memory","display_name":"Shared memory","score":0.20706531405448914}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.9240354895591736},{"id":"https://openalex.org/C190290938","wikidata":"https://www.wikidata.org/wiki/Q387015","display_name":"Trie","level":3,"score":0.5605223774909973},{"id":"https://openalex.org/C123657996","wikidata":"https://www.wikidata.org/wiki/Q12271","display_name":"Architecture","level":2,"score":0.5186304450035095},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.5173087120056152},{"id":"https://openalex.org/C75165309","wikidata":"https://www.wikidata.org/wiki/Q2258979","display_name":"Search engine indexing","level":2,"score":0.48019611835479736},{"id":"https://openalex.org/C63000827","wikidata":"https://www.wikidata.org/wiki/Q3080428","display_name":"Software portability","level":2,"score":0.4711386561393738},{"id":"https://openalex.org/C2780598303","wikidata":"https://www.wikidata.org/wiki/Q65921492","display_name":"Flexibility (engineering)","level":2,"score":0.4244036078453064},{"id":"https://openalex.org/C117884012","wikidata":"https://www.wikidata.org/wiki/Q94489","display_name":"n-gram","level":3,"score":0.42096590995788574},{"id":"https://openalex.org/C74426580","wikidata":"https://www.wikidata.org/wiki/Q719484","display_name":"Memory map","level":3,"score":0.41718336939811707},{"id":"https://openalex.org/C46900642","wikidata":"https://www.wikidata.org/wiki/Q2647","display_name":"Huffman coding","level":3,"score":0.4110109210014343},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.370635986328125},{"id":"https://openalex.org/C162319229","wikidata":"https://www.wikidata.org/wiki/Q175263","display_name":"Data structure","level":2,"score":0.3167749345302582},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.2973390221595764},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.24495503306388855},{"id":"https://openalex.org/C78548338","wikidata":"https://www.wikidata.org/wiki/Q2493","display_name":"Data compression","level":2,"score":0.24141010642051697},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.21750527620315552},{"id":"https://openalex.org/C133875982","wikidata":"https://www.wikidata.org/wiki/Q764810","display_name":"Shared memory","level":2,"score":0.20706531405448914},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C153349607","wikidata":"https://www.wikidata.org/wiki/Q36649","display_name":"Visual arts","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1017/s1351324911000349","is_oa":false,"landing_page_url":"https://doi.org/10.1017/s1351324911000349","pdf_url":null,"source":{"id":"https://openalex.org/S18088403","display_name":"Natural Language Engineering","issn_l":"1351-3249","issn":["1351-3249","1469-8110"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310311721","host_organization_name":"Cambridge University Press","host_organization_lineage":["https://openalex.org/P4310311721","https://openalex.org/P4310311702"],"host_organization_lineage_names":["Cambridge University Press","University of Cambridge"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Natural Language Engineering","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":56,"referenced_works":["https://openalex.org/W87260881","https://openalex.org/W134039547","https://openalex.org/W162552777","https://openalex.org/W194374310","https://openalex.org/W195681115","https://openalex.org/W1501187080","https://openalex.org/W1518502857","https://openalex.org/W1544588226","https://openalex.org/W1554887429","https://openalex.org/W1562125942","https://openalex.org/W1574901103","https://openalex.org/W1631260214","https://openalex.org/W1740463855","https://openalex.org/W1993301141","https://openalex.org/W1998720920","https://openalex.org/W2000484009","https://openalex.org/W2000577984","https://openalex.org/W2017708149","https://openalex.org/W2065375678","https://openalex.org/W2069074882","https://openalex.org/W2099926694","https://openalex.org/W2106540279","https://openalex.org/W2109664771","https://openalex.org/W2112625711","https://openalex.org/W2114539183","https://openalex.org/W2115918455","https://openalex.org/W2122429665","https://openalex.org/W2123845384","https://openalex.org/W2125750666","https://openalex.org/W2128808215","https://openalex.org/W2134800885","https://openalex.org/W2140608987","https://openalex.org/W2153393503","https://openalex.org/W2155794909","https://openalex.org/W2158403176","https://openalex.org/W2169615869","https://openalex.org/W2169657490","https://openalex.org/W2171457693","https://openalex.org/W2171458318","https://openalex.org/W2171913306","https://openalex.org/W2172097231","https://openalex.org/W2337480916","https://openalex.org/W2489487449","https://openalex.org/W2554096094","https://openalex.org/W2602633191","https://openalex.org/W2604292070","https://openalex.org/W2606601345","https://openalex.org/W2608040521","https://openalex.org/W2615497679","https://openalex.org/W2618735189","https://openalex.org/W2741609678","https://openalex.org/W2930957955","https://openalex.org/W3014229491","https://openalex.org/W3143835353","https://openalex.org/W4285719527","https://openalex.org/W4389615663"],"related_works":["https://openalex.org/W2046569047","https://openalex.org/W2186419898","https://openalex.org/W2048294592","https://openalex.org/W2370961680","https://openalex.org/W2278452282","https://openalex.org/W2156393489","https://openalex.org/W1552925710","https://openalex.org/W207628907","https://openalex.org/W2740229587","https://openalex.org/W1786707430"],"abstract_inverted_index":{"Abstract":[0],"This":[1,96],"paper":[2],"presents":[3],"TrendStream":[4,21],",":[5],"a":[6,23,109],"versatile":[7],"architecture":[8,97],"for":[9,16,33,101],"very":[10,118],"large":[11],"word":[12],"n-gram":[13,85,138],"datasets.":[14],"Designed":[15],"speed,":[17],"flexibility,":[18],"and":[19,30,36,90,103,157],"portability,":[20],"uses":[22],"novel":[24],"trie-based":[25],"architecture,":[26],"features":[27],"lossless":[28],"compression,":[29],"provides":[31],"optimization":[32],"both":[34],"speed":[35],"memory":[37,104,120],"use.":[38],"In":[39],"addition":[40],"to":[41],"literal":[42],"queries,":[43],"it":[44],"also":[45],"supports":[46],"fast":[47,106],"pattern":[48],"matching":[49],"searches":[50],"(with":[51],"wildcards":[52],"or":[53,112],"regular":[54],"expressions),":[55],"on":[56,127],"the":[57,71,137,153,158],"same":[58],"data":[59,115],"structure,":[60],"without":[61],"any":[62],"additional":[63],"indexing.":[64],"Language":[65],"models":[66,86],"are":[67,150],"updateable":[68],"directly":[69],"in":[70],"compiled":[72,94],"binary":[73],"format,":[74],"allowing":[75],"rapid":[76],"encoding":[77],"of":[78,84,92,108],"existing":[79],"tabulated":[80],"collections,":[81],"incremental":[82],"generation":[83],"from":[87],"streaming":[88],"text,":[89],"merging":[91],"encoded":[93],"files.":[95],"offers":[98],"flexible":[99],"choices":[100],"loading":[102,116],"utilization:":[105],"memory-mapping":[107],"multi-gigabyte":[110],"model,":[111],"on-demand":[113],"partial":[114],"with":[117,152],"modest":[119],"requirements.":[121],"The":[122],"implemented":[123],"system":[124],"runs":[125],"successfully":[126],"several":[128],"different":[129,132],"platforms,":[130],"under":[131],"operating":[133],"systems,":[134],"even":[135],"when":[136],"model":[139],"file":[140],"is":[141],"much":[142],"larger":[143],"than":[144],"available":[145],"memory.":[146],"Experimental":[147],"evaluation":[148],"results":[149],"presented":[151],"Google":[154],"Web1T":[155],"collection":[156],"Gigaword":[159],"corpus.":[160]},"counts_by_year":[{"year":2022,"cited_by_count":1},{"year":2014,"cited_by_count":4},{"year":2013,"cited_by_count":3},{"year":2012,"cited_by_count":3}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
