{"id":"https://openalex.org/W2132714218","doi":"https://doi.org/10.3115/1075096.1075102","title":"Generalized algorithms for constructing statistical language models","display_name":"Generalized algorithms for constructing statistical language models","publication_year":2003,"publication_date":"2003-01-01","ids":{"openalex":"https://openalex.org/W2132714218","doi":"https://doi.org/10.3115/1075096.1075102","mag":"2132714218"},"language":"en","primary_location":{"id":"doi:10.3115/1075096.1075102","is_oa":true,"landing_page_url":"https://doi.org/10.3115/1075096.1075102","pdf_url":"https://dl.acm.org/doi/pdf/10.3115/1075096.1075102","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 41st Annual Meeting on Association for Computational Linguistics  - ACL '03","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.3115/1075096.1075102","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5030888546","display_name":"Cyril Allauzen","orcid":null},"institutions":[{"id":"https://openalex.org/I1283103587","display_name":"AT&T (United States)","ror":"https://ror.org/02bbd5539","country_code":"US","type":"company","lineage":["https://openalex.org/I1283103587"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Cyril Allauzen","raw_affiliation_strings":["AT&T Labs - Research, Florham Park, NJ","AT&T Labs\u2014\u2014Research, Florham Park, NJ#TAB#"],"affiliations":[{"raw_affiliation_string":"AT&T Labs - Research, Florham Park, NJ","institution_ids":["https://openalex.org/I1283103587"]},{"raw_affiliation_string":"AT&T Labs\u2014\u2014Research, Florham Park, NJ#TAB#","institution_ids":["https://openalex.org/I1283103587"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5058849006","display_name":"Mehryar Mohri","orcid":"https://orcid.org/0000-0002-3987-9847"},"institutions":[{"id":"https://openalex.org/I1283103587","display_name":"AT&T (United States)","ror":"https://ror.org/02bbd5539","country_code":"US","type":"company","lineage":["https://openalex.org/I1283103587"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Mehryar Mohri","raw_affiliation_strings":["AT&T Labs - Research, Florham Park, NJ","AT&T Labs\u2014\u2014Research, Florham Park, NJ#TAB#"],"affiliations":[{"raw_affiliation_string":"AT&T Labs - Research, Florham Park, NJ","institution_ids":["https://openalex.org/I1283103587"]},{"raw_affiliation_string":"AT&T Labs\u2014\u2014Research, Florham Park, NJ#TAB#","institution_ids":["https://openalex.org/I1283103587"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5020068498","display_name":"Brian Roark","orcid":null},"institutions":[{"id":"https://openalex.org/I1283103587","display_name":"AT&T (United States)","ror":"https://ror.org/02bbd5539","country_code":"US","type":"company","lineage":["https://openalex.org/I1283103587"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Brian Roark","raw_affiliation_strings":["AT&T Labs - Research, Florham Park, NJ","AT&T Labs\u2014\u2014Research, Florham Park, NJ#TAB#"],"affiliations":[{"raw_affiliation_string":"AT&T Labs - Research, Florham Park, NJ","institution_ids":["https://openalex.org/I1283103587"]},{"raw_affiliation_string":"AT&T Labs\u2014\u2014Research, Florham Park, NJ#TAB#","institution_ids":["https://openalex.org/I1283103587"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5030888546"],"corresponding_institution_ids":["https://openalex.org/I1283103587"],"apc_list":null,"apc_paid":null,"fwci":11.2311,"has_fulltext":true,"cited_by_count":126,"citation_normalized_percentile":{"value":0.9839965,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":"1","issue":null,"first_page":"40","last_page":"47"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11567","display_name":"semigroups and automata theory","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8492794036865234},{"id":"https://openalex.org/keywords/automaton","display_name":"Automaton","score":0.602213978767395},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.5802461504936218},{"id":"https://openalex.org/keywords/cache-language-model","display_name":"Cache language model","score":0.5581251978874207},{"id":"https://openalex.org/keywords/class","display_name":"Class (philosophy)","score":0.5204901099205017},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.49565398693084717},{"id":"https://openalex.org/keywords/n-gram","display_name":"n-gram","score":0.46953070163726807},{"id":"https://openalex.org/keywords/grammar","display_name":"Grammar","score":0.44528335332870483},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.4328067898750305},{"id":"https://openalex.org/keywords/vocabulary","display_name":"Vocabulary","score":0.4306502640247345},{"id":"https://openalex.org/keywords/grammar-induction","display_name":"Grammar induction","score":0.41935741901397705},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.415945440530777},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.390494704246521},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.35629770159721375},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.32187163829803467},{"id":"https://openalex.org/keywords/rule-based-machine-translation","display_name":"Rule-based machine translation","score":0.31424185633659363},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.22401252388954163},{"id":"https://openalex.org/keywords/universal-networking-language","display_name":"Universal Networking Language","score":0.142393559217453}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8492794036865234},{"id":"https://openalex.org/C112505250","wikidata":"https://www.wikidata.org/wiki/Q787116","display_name":"Automaton","level":2,"score":0.602213978767395},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.5802461504936218},{"id":"https://openalex.org/C39608478","wikidata":"https://www.wikidata.org/wiki/Q5015979","display_name":"Cache language model","level":5,"score":0.5581251978874207},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.5204901099205017},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.49565398693084717},{"id":"https://openalex.org/C117884012","wikidata":"https://www.wikidata.org/wiki/Q94489","display_name":"n-gram","level":3,"score":0.46953070163726807},{"id":"https://openalex.org/C26022165","wikidata":"https://www.wikidata.org/wiki/Q8091","display_name":"Grammar","level":2,"score":0.44528335332870483},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.4328067898750305},{"id":"https://openalex.org/C2777601683","wikidata":"https://www.wikidata.org/wiki/Q6499736","display_name":"Vocabulary","level":2,"score":0.4306502640247345},{"id":"https://openalex.org/C56601403","wikidata":"https://www.wikidata.org/wiki/Q5593673","display_name":"Grammar induction","level":3,"score":0.41935741901397705},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.415945440530777},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.390494704246521},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.35629770159721375},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.32187163829803467},{"id":"https://openalex.org/C53893814","wikidata":"https://www.wikidata.org/wiki/Q7378909","display_name":"Rule-based machine translation","level":2,"score":0.31424185633659363},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.22401252388954163},{"id":"https://openalex.org/C83479923","wikidata":"https://www.wikidata.org/wiki/Q2063748","display_name":"Universal Networking Language","level":4,"score":0.142393559217453},{"id":"https://openalex.org/C129353971","wikidata":"https://www.wikidata.org/wiki/Q5156949","display_name":"Comprehension approach","level":3,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0}],"mesh":[],"locations_count":5,"locations":[{"id":"doi:10.3115/1075096.1075102","is_oa":true,"landing_page_url":"https://doi.org/10.3115/1075096.1075102","pdf_url":"https://dl.acm.org/doi/pdf/10.3115/1075096.1075102","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 41st Annual Meeting on Association for Computational Linguistics  - ACL '03","raw_type":"proceedings-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.2.4842","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.2.4842","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://acl.ldc.upenn.edu/acl2003/main/pdf/Allauzen.pdf","raw_type":"text"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.5.9212","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.5.9212","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://www.research.att.com/~allauzen/postscript/grm4.ps","raw_type":"text"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.7.6493","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.7.6493","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://www.research.att.com/~mohri/postscript/grm4.ps","raw_type":"text"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.9.3642","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.9.3642","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://acl.ldc.upenn.edu/acl2003/main/pdfs/Allauzen.pdf","raw_type":"text"}],"best_oa_location":{"id":"doi:10.3115/1075096.1075102","is_oa":true,"landing_page_url":"https://doi.org/10.3115/1075096.1075102","pdf_url":"https://dl.acm.org/doi/pdf/10.3115/1075096.1075102","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 41st Annual Meeting on Association for Computational Linguistics  - ACL '03","raw_type":"proceedings-article"},"sustainable_development_goals":[{"score":0.6899999976158142,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2132714218.pdf","grobid_xml":"https://content.openalex.org/works/W2132714218.grobid-xml"},"referenced_works_count":21,"referenced_works":["https://openalex.org/W36566811","https://openalex.org/W1506437980","https://openalex.org/W1513168562","https://openalex.org/W1519138166","https://openalex.org/W1578332044","https://openalex.org/W1597533204","https://openalex.org/W1607161872","https://openalex.org/W1797288984","https://openalex.org/W1860547055","https://openalex.org/W1903115690","https://openalex.org/W1984151763","https://openalex.org/W2075201173","https://openalex.org/W2100000146","https://openalex.org/W2121227244","https://openalex.org/W2125528794","https://openalex.org/W2125529971","https://openalex.org/W2134237567","https://openalex.org/W2144462324","https://openalex.org/W2158195707","https://openalex.org/W4210678036","https://openalex.org/W4302312245"],"related_works":["https://openalex.org/W2787311093","https://openalex.org/W4255155614","https://openalex.org/W3203715570","https://openalex.org/W189245980","https://openalex.org/W2897427480","https://openalex.org/W12369754","https://openalex.org/W134198950","https://openalex.org/W2147879411","https://openalex.org/W2624072012","https://openalex.org/W2169518243"],"abstract_inverted_index":{"Recent":[0],"text":[1,161],"and":[2,12,25,31,40,105,112,116,141,162],"speech":[3,8,67],"processing":[4,164],"applications":[5],"such":[6],"as":[7],"mining":[9],"raise":[10],"new":[11,30,76],"more":[13,37,117],"general":[14,38,118,148],"problems":[15,39],"related":[16],"to":[17,34,129],"the":[18,54,154],"construction":[19],"of":[20,57,82,101,138],"language":[21,84,123,152],"models.":[22],"We":[23,47],"present":[24,113],"describe":[26,74],"in":[27,60,146],"detail":[28],"several":[29],"efficient":[32,136],"algorithms":[33,140],"address":[35],"these":[36],"report":[41],"experimental":[42],"results":[43],"demonstrating":[44],"their":[45],"usefulness.":[46],"give":[48],"an":[49,106,131],"algorithm":[50],"for":[51,78,93,97,120,151],"computing":[52],"efficiently":[53],"expected":[55],"counts":[56],"any":[58,70],"sequence":[59],"a":[61,66,75,98,114,147],"word":[62],"lattice":[63],"output":[64],"by":[65,86],"recognizer":[68],"or":[69],"arbitrary":[71,132],"weighted":[72,87,133],"automaton;":[73],"technique":[77,119],"creating":[79],"exact":[80],"representations":[81],"n-gram":[83,107],"models":[85,124],"automata":[88],"whose":[89],"size":[90,100],"is":[91],"practical":[92],"offline":[94],"use":[95],"even":[96],"vocabulary":[99],"about":[102],"500,000":[103],"words":[104],"order":[108],"n":[109],"=":[110],"6;":[111],"simple":[115],"constructing":[121],"class-based":[122],"that":[125,157],"allows":[126],"each":[127],"class":[128],"represent":[130],"automaton.":[134],"An":[135],"implementation":[137],"our":[139],"techniques":[142],"has":[143],"been":[144],"incorporated":[145],"software":[149],"library":[150],"modeling,":[153],"GRM":[155],"Library,":[156],"includes":[158],"many":[159],"other":[160],"grammar":[163],"functionalities.":[165]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":3},{"year":2019,"cited_by_count":2},{"year":2018,"cited_by_count":2},{"year":2017,"cited_by_count":6},{"year":2016,"cited_by_count":2},{"year":2015,"cited_by_count":10},{"year":2014,"cited_by_count":5},{"year":2013,"cited_by_count":7},{"year":2012,"cited_by_count":12}],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
