{"id":"https://openalex.org/W2963766645","doi":"https://doi.org/10.1162/tacl_a_00112","title":"Fast, Small and Exact: Infinite-order Language Modeling with Compressed Suffix Trees","display_name":"Fast, Small and Exact: Infinite-order Language Modeling with Compressed Suffix Trees","publication_year":2017,"publication_date":"2017-01-11","ids":{"openalex":"https://openalex.org/W2963766645","doi":"https://doi.org/10.1162/tacl_a_00112","mag":"2963766645"},"language":"en","primary_location":{"id":"pmh:oai:jupiter.its.unimelb.edu.au:11343/123254","is_oa":true,"landing_page_url":"http://hdl.handle.net/11343/123254","pdf_url":"http://hdl.handle.net/11343/123254","source":{"id":"https://openalex.org/S4377196259","display_name":"Minerva Access (University of Melbourne)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I165779595","host_organization_name":"The University of Melbourne","host_organization_lineage":["https://openalex.org/I165779595"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Journal Article"},"type":"article","indexed_in":[],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"http://hdl.handle.net/11343/123254","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5086032589","display_name":"Ehsan Shareghi","orcid":null},"institutions":[{"id":"https://openalex.org/I56590836","display_name":"Monash University","ror":"https://ror.org/02bfwt286","country_code":"AU","type":"education","lineage":["https://openalex.org/I56590836"]}],"countries":["AU"],"is_corresponding":true,"raw_author_name":"Shareghi, E","raw_affiliation_strings":["Faculty of Information Technology , Monash University Computing and Information Systems , The University of Melbourne","Faculty of Information Technology, Monash University,"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Faculty of Information Technology , Monash University Computing and Information Systems , The University of Melbourne","institution_ids":["https://openalex.org/I56590836"]},{"raw_affiliation_string":"Faculty of Information Technology, Monash University,","institution_ids":["https://openalex.org/I56590836"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5086980890","display_name":"Matthias Petri","orcid":"https://orcid.org/0000-0002-0054-9429"},"institutions":[{"id":"https://openalex.org/I165779595","display_name":"The University of Melbourne","ror":"https://ror.org/01ej9dk98","country_code":"AU","type":"education","lineage":["https://openalex.org/I165779595"]},{"id":"https://openalex.org/I56590836","display_name":"Monash University","ror":"https://ror.org/02bfwt286","country_code":"AU","type":"education","lineage":["https://openalex.org/I56590836"]}],"countries":["AU"],"is_corresponding":true,"raw_author_name":"Petri, M","raw_affiliation_strings":["Faculty of Information Technology , Monash University Computing and Information Systems , The University of Melbourne","Computing and Information Systems, The University of Melbourne,"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Faculty of Information Technology , Monash University Computing and Information Systems , The University of Melbourne","institution_ids":["https://openalex.org/I56590836"]},{"raw_affiliation_string":"Computing and Information Systems, The University of Melbourne,","institution_ids":["https://openalex.org/I165779595"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5081525024","display_name":"Gholamreza Haffari","orcid":"https://orcid.org/0000-0001-7326-8380"},"institutions":[{"id":"https://openalex.org/I56590836","display_name":"Monash University","ror":"https://ror.org/02bfwt286","country_code":"AU","type":"education","lineage":["https://openalex.org/I56590836"]}],"countries":["AU"],"is_corresponding":true,"raw_author_name":"Haffari, G","raw_affiliation_strings":["Faculty of Information Technology , Monash University Computing and Information Systems , The University of Melbourne","Faculty of Information Technology, Monash University,"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Faculty of Information Technology , Monash University Computing and Information Systems , The University of Melbourne","institution_ids":["https://openalex.org/I56590836"]},{"raw_affiliation_string":"Faculty of Information Technology, Monash University,","institution_ids":["https://openalex.org/I56590836"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5078530959","display_name":"Trevor Cohn","orcid":"https://orcid.org/0000-0003-4363-1673"},"institutions":[{"id":"https://openalex.org/I165779595","display_name":"The University of Melbourne","ror":"https://ror.org/01ej9dk98","country_code":"AU","type":"education","lineage":["https://openalex.org/I165779595"]},{"id":"https://openalex.org/I56590836","display_name":"Monash University","ror":"https://ror.org/02bfwt286","country_code":"AU","type":"education","lineage":["https://openalex.org/I56590836"]}],"countries":["AU"],"is_corresponding":true,"raw_author_name":"Cohn, T","raw_affiliation_strings":["Faculty of Information Technology , Monash University Computing and Information Systems , The University of Melbourne","Computing and Information Systems, The University of Melbourne,"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Faculty of Information Technology , Monash University Computing and Information Systems , The University of Melbourne","institution_ids":["https://openalex.org/I56590836"]},{"raw_affiliation_string":"Computing and Information Systems, The University of Melbourne,","institution_ids":["https://openalex.org/I165779595"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5078530959","https://openalex.org/A5081525024","https://openalex.org/A5086032589","https://openalex.org/A5086980890"],"corresponding_institution_ids":["https://openalex.org/I165779595","https://openalex.org/I56590836"],"apc_list":null,"apc_paid":null,"fwci":5.82,"has_fulltext":true,"cited_by_count":29,"citation_normalized_percentile":{"value":0.96846591,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":100},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9962999820709229,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.9257066249847412},{"id":"https://openalex.org/keywords/suffix","display_name":"Suffix","score":0.7335612773895264},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.5250617861747742},{"id":"https://openalex.org/keywords/memory-footprint","display_name":"Memory footprint","score":0.4617905914783478},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.4535067081451416},{"id":"https://openalex.org/keywords/suffix-tree","display_name":"Suffix tree","score":0.4431527853012085},{"id":"https://openalex.org/keywords/markov-chain","display_name":"Markov chain","score":0.4431299567222595},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4206288754940033},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.38006168603897095},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3495895564556122},{"id":"https://openalex.org/keywords/data-structure","display_name":"Data structure","score":0.32714203000068665},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.29757899045944214},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.1431272029876709}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.9257066249847412},{"id":"https://openalex.org/C2779804580","wikidata":"https://www.wikidata.org/wiki/Q102047","display_name":"Suffix","level":2,"score":0.7335612773895264},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.5250617861747742},{"id":"https://openalex.org/C74912251","wikidata":"https://www.wikidata.org/wiki/Q6815727","display_name":"Memory footprint","level":2,"score":0.4617905914783478},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.4535067081451416},{"id":"https://openalex.org/C2781166958","wikidata":"https://www.wikidata.org/wiki/Q1426863","display_name":"Suffix tree","level":3,"score":0.4431527853012085},{"id":"https://openalex.org/C98763669","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov chain","level":2,"score":0.4431299567222595},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4206288754940033},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.38006168603897095},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3495895564556122},{"id":"https://openalex.org/C162319229","wikidata":"https://www.wikidata.org/wiki/Q175263","display_name":"Data structure","level":2,"score":0.32714203000068665},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.29757899045944214},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.1431272029876709},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"pmh:oai:jupiter.its.unimelb.edu.au:11343/123254","is_oa":true,"landing_page_url":"http://hdl.handle.net/11343/123254","pdf_url":"http://hdl.handle.net/11343/123254","source":{"id":"https://openalex.org/S4377196259","display_name":"Minerva Access (University of Melbourne)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I165779595","host_organization_name":"The University of Melbourne","host_organization_lineage":["https://openalex.org/I165779595"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Journal Article"}],"best_oa_location":{"id":"pmh:oai:jupiter.its.unimelb.edu.au:11343/123254","is_oa":true,"landing_page_url":"http://hdl.handle.net/11343/123254","pdf_url":"http://hdl.handle.net/11343/123254","source":{"id":"https://openalex.org/S4377196259","display_name":"Minerva Access (University of Melbourne)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I165779595","host_organization_name":"The University of Melbourne","host_organization_lineage":["https://openalex.org/I165779595"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Journal Article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.7599999904632568}],"awards":[],"funders":[{"id":"https://openalex.org/F4320310369","display_name":"National ICT Australia","ror":"https://ror.org/03q397159"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2963766645.pdf","grobid_xml":"https://content.openalex.org/works/W2963766645.grobid-xml"},"referenced_works_count":31,"referenced_works":["https://openalex.org/W22168010","https://openalex.org/W87260881","https://openalex.org/W126222424","https://openalex.org/W1549037892","https://openalex.org/W1551461028","https://openalex.org/W1631260214","https://openalex.org/W1962019683","https://openalex.org/W1982042804","https://openalex.org/W1985174631","https://openalex.org/W2057147815","https://openalex.org/W2086536051","https://openalex.org/W2106540279","https://openalex.org/W2109664771","https://openalex.org/W2122429665","https://openalex.org/W2134696992","https://openalex.org/W2134800885","https://openalex.org/W2149243190","https://openalex.org/W2154099718","https://openalex.org/W2158195707","https://openalex.org/W2158322625","https://openalex.org/W2158874082","https://openalex.org/W2161488606","https://openalex.org/W2171913306","https://openalex.org/W2172097231","https://openalex.org/W2189576523","https://openalex.org/W2250193734","https://openalex.org/W2250653840","https://openalex.org/W2250988012","https://openalex.org/W2295986819","https://openalex.org/W2533248932","https://openalex.org/W2951714314"],"related_works":["https://openalex.org/W96331545","https://openalex.org/W1548907175","https://openalex.org/W1882920571","https://openalex.org/W2583658747","https://openalex.org/W2159942118","https://openalex.org/W1517600056","https://openalex.org/W2160738675","https://openalex.org/W2036633468","https://openalex.org/W2396668937","https://openalex.org/W2003608043"],"abstract_inverted_index":{"Efficient":[0],"methods":[1],"for":[2,8],"storing":[3],"and":[4,33,70,76,101],"querying":[5],"are":[6,105],"critical":[7],"scaling":[9],"high-order":[10],"m-gram":[11],"language":[12,20,46],"models":[13],"to":[14,59],"large":[15,74],"corpora.":[16],"We":[17,50],"propose":[18],"a":[19,27,64],"model":[21,47],"based":[22],"on":[23],"compressed":[24],"suffix":[25],"trees,":[26],"representation":[28],"that":[29,104],"is":[30,82],"highly":[31,83],"compact":[32],"can":[34],"be":[35],"easily":[36],"held":[37],"in":[38,44,67],"memory,":[39],"while":[40],"supporting":[41],"queries":[42],"needed":[43],"computing":[45],"probabilities":[48],"on-the-fly.":[49],"present":[51],"several":[52],"optimisations":[53],"which":[54],"improve":[55],"query":[56],"runtimes":[57,103],"up":[58],"2500\u00d7,":[60],"despite":[61],"only":[62],"incurring":[63],"modest":[65],"increase":[66],"construction":[68],"time":[69],"memory":[71,94],"usage.":[72],"For":[73],"corpora":[75],"high":[77],"Markov":[78],"orders,":[79],"our":[80],"method":[81],"competitive":[84],"with":[85],"the":[86],"state-of-the-art":[87],"KenLM":[88],"package.":[89],"It":[90],"imposes":[91],"much":[92],"lower":[93],"requirements,":[95],"often":[96],"by":[97],"orders":[98],"of":[99],"magnitude,":[100],"has":[102],"either":[106],"similar":[107],"(for":[108,112],"training)":[109],"or":[110],"comparable":[111],"querying).":[113]},"counts_by_year":[{"year":2023,"cited_by_count":1},{"year":2020,"cited_by_count":22},{"year":2019,"cited_by_count":3},{"year":2018,"cited_by_count":2},{"year":2017,"cited_by_count":1}],"updated_date":"2026-04-28T14:05:53.105641","created_date":"2025-10-10T00:00:00"}
