{"id":"https://openalex.org/W2767040298","doi":"https://doi.org/10.1007/978-3-319-69805-2_30","title":"Frequency Consolidation Among Word N-Grams","display_name":"Frequency Consolidation Among Word N-Grams","publication_year":2017,"publication_date":"2017-01-01","ids":{"openalex":"https://openalex.org/W2767040298","doi":"https://doi.org/10.1007/978-3-319-69805-2_30","mag":"2767040298"},"language":"en","primary_location":{"id":"doi:10.1007/978-3-319-69805-2_30","is_oa":true,"landing_page_url":"https://doi.org/10.1007/978-3-319-69805-2_30","pdf_url":null,"source":{"id":"https://openalex.org/S106296714","display_name":"Lecture notes in computer science","issn_l":"0302-9743","issn":["0302-9743","1611-3349"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"book series"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Lecture Notes in Computer Science","raw_type":"book-chapter"},"type":"book-chapter","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://doi.org/10.1007/978-3-319-69805-2_30","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5065966118","display_name":"Andreas Buerki","orcid":"https://orcid.org/0000-0003-2151-3246"},"institutions":[{"id":"https://openalex.org/I79510175","display_name":"Cardiff University","ror":"https://ror.org/03kk7td41","country_code":"GB","type":"education","lineage":["https://openalex.org/I79510175"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Andreas Buerki","raw_affiliation_strings":["Centre for Language and Communication Research, Cardiff University, Cardiff, Wales, UK"],"affiliations":[{"raw_affiliation_string":"Centre for Language and Communication Research, Cardiff University, Cardiff, Wales, UK","institution_ids":["https://openalex.org/I79510175"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5065966118"],"corresponding_institution_ids":["https://openalex.org/I79510175"],"apc_list":{"value":5000,"currency":"EUR","value_usd":5392},"apc_paid":{"value":5000,"currency":"EUR","value_usd":5392},"fwci":0.267,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.58363624,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"432","last_page":"446"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12090","display_name":"Language and cultural evolution","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/3316","display_name":"Cultural Studies"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9975000023841858,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/substring","display_name":"Substring","score":0.8468557000160217},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7459734678268433},{"id":"https://openalex.org/keywords/consolidation","display_name":"Consolidation (business)","score":0.7370705604553223},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.6875068545341492},{"id":"https://openalex.org/keywords/word-length","display_name":"Word length","score":0.6618157625198364},{"id":"https://openalex.org/keywords/word-lists-by-frequency","display_name":"Word lists by frequency","score":0.5671976804733276},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5156968235969543},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5137273073196411},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.45824310183525085},{"id":"https://openalex.org/keywords/n-gram","display_name":"n-gram","score":0.44220787286758423},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.3642820417881012},{"id":"https://openalex.org/keywords/arithmetic","display_name":"Arithmetic","score":0.33029526472091675},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.17921188473701477},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.16743353009223938},{"id":"https://openalex.org/keywords/data-structure","display_name":"Data structure","score":0.11440810561180115},{"id":"https://openalex.org/keywords/sentence","display_name":"Sentence","score":0.11341163516044617}],"concepts":[{"id":"https://openalex.org/C182407805","wikidata":"https://www.wikidata.org/wiki/Q2626534","display_name":"Substring","level":3,"score":0.8468557000160217},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7459734678268433},{"id":"https://openalex.org/C2776014549","wikidata":"https://www.wikidata.org/wiki/Q3050847","display_name":"Consolidation (business)","level":2,"score":0.7370705604553223},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.6875068545341492},{"id":"https://openalex.org/C3019641298","wikidata":"https://www.wikidata.org/wiki/Q625642","display_name":"Word length","level":2,"score":0.6618157625198364},{"id":"https://openalex.org/C175293574","wikidata":"https://www.wikidata.org/wiki/Q697133","display_name":"Word lists by frequency","level":3,"score":0.5671976804733276},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5156968235969543},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5137273073196411},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.45824310183525085},{"id":"https://openalex.org/C117884012","wikidata":"https://www.wikidata.org/wiki/Q94489","display_name":"n-gram","level":3,"score":0.44220787286758423},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3642820417881012},{"id":"https://openalex.org/C94375191","wikidata":"https://www.wikidata.org/wiki/Q11205","display_name":"Arithmetic","level":1,"score":0.33029526472091675},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.17921188473701477},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.16743353009223938},{"id":"https://openalex.org/C162319229","wikidata":"https://www.wikidata.org/wiki/Q175263","display_name":"Data structure","level":2,"score":0.11440810561180115},{"id":"https://openalex.org/C2777530160","wikidata":"https://www.wikidata.org/wiki/Q41796","display_name":"Sentence","level":2,"score":0.11341163516044617},{"id":"https://openalex.org/C144133560","wikidata":"https://www.wikidata.org/wiki/Q4830453","display_name":"Business","level":0,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C121955636","wikidata":"https://www.wikidata.org/wiki/Q4116214","display_name":"Accounting","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1007/978-3-319-69805-2_30","is_oa":true,"landing_page_url":"https://doi.org/10.1007/978-3-319-69805-2_30","pdf_url":null,"source":{"id":"https://openalex.org/S106296714","display_name":"Lecture notes in computer science","issn_l":"0302-9743","issn":["0302-9743","1611-3349"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"book series"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Lecture Notes in Computer Science","raw_type":"book-chapter"}],"best_oa_location":{"id":"doi:10.1007/978-3-319-69805-2_30","is_oa":true,"landing_page_url":"https://doi.org/10.1007/978-3-319-69805-2_30","pdf_url":null,"source":{"id":"https://openalex.org/S106296714","display_name":"Lecture notes in computer science","issn_l":"0302-9743","issn":["0302-9743","1611-3349"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"book series"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Lecture Notes in Computer Science","raw_type":"book-chapter"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":36,"referenced_works":["https://openalex.org/W65635455","https://openalex.org/W181196098","https://openalex.org/W620516646","https://openalex.org/W622624472","https://openalex.org/W1518902157","https://openalex.org/W1525289242","https://openalex.org/W1558866924","https://openalex.org/W1565569979","https://openalex.org/W1605120103","https://openalex.org/W1940278502","https://openalex.org/W1972686387","https://openalex.org/W1986535287","https://openalex.org/W2008434289","https://openalex.org/W2019096529","https://openalex.org/W2019649581","https://openalex.org/W2043322680","https://openalex.org/W2049107599","https://openalex.org/W2070205520","https://openalex.org/W2080579867","https://openalex.org/W2083199601","https://openalex.org/W2084053452","https://openalex.org/W2085313961","https://openalex.org/W2086677051","https://openalex.org/W2122566486","https://openalex.org/W2128192613","https://openalex.org/W2153232090","https://openalex.org/W2166522237","https://openalex.org/W2240448016","https://openalex.org/W2250578549","https://openalex.org/W2326282800","https://openalex.org/W2752672336","https://openalex.org/W2904082680","https://openalex.org/W2997757304","https://openalex.org/W4236595676","https://openalex.org/W4238067037","https://openalex.org/W4388152583"],"related_works":["https://openalex.org/W2141389247","https://openalex.org/W2184716188","https://openalex.org/W2141732239","https://openalex.org/W2913504480","https://openalex.org/W1482350068","https://openalex.org/W2031909249","https://openalex.org/W4304891330","https://openalex.org/W2075174955","https://openalex.org/W2326629528","https://openalex.org/W3098115713"],"abstract_inverted_index":{"This":[0],"paper":[1],"considers":[2],"the":[3,21,39,58,65,89,111],"issue":[4],"of":[5,10,41,45,61,67,75,92,98,101,110,113,118,136,140],"frequency":[6,59,125],"consolidation":[7,40,56,126],"in":[8,105,149],"lists":[9,44],"different":[11,46],"length":[12,47],"word":[13,17,93,131],"n-grams":[14,100,132],"(i.e.":[15],"recurrent":[16],"sequences)":[18],"extracted":[19],"from":[20,49],"same":[22],"underlying":[23,155],"corpus.":[24],"A":[25],"simple":[26],"algorithm":[27],"\u2013":[28,34],"enhanced":[29],"by":[30],"a":[31,82],"preparatory":[32],"stage":[33],"is":[35,134,157],"proposed":[36,122],"which":[37,104],"allows":[38,88,107],"frequencies":[42],"among":[43,130],"n-grams,":[48],"2-grams":[50],"to":[51,64,154],"6-grams":[52],"and":[53,87,127,133,143],"beyond.":[54],"The":[55,121],"adjusts":[57],"count":[60],"each":[62],"n-gram":[63,141],"number":[66],"its":[68,71],"occurrences":[69,72],"minus":[70],"as":[73],"part":[74,97],"longer":[76],"n-grams.":[77],"Among":[78],"other":[79],"uses,":[80],"such":[81],"procedure":[83,123],"aids":[84],"linguistic":[85],"analysis":[86],"non-inflationary":[90],"counting":[91],"tokens":[94],"that":[95],"are":[96],"frequent":[99],"various":[102],"lengths,":[103],"turn":[106],"an":[108],"assessment":[109],"proportion":[112],"running":[114],"text":[115],"made":[116],"up":[117],"recurring":[119],"chunks.":[120],"delivers":[124],"substring":[128],"reduction":[129],"independent":[135],"any":[137],"particular":[138],"method":[139],"extraction":[142],"filtering,":[144],"making":[145],"it":[146],"applicable":[147],"also":[148],"situations":[150],"where":[151],"full":[152],"access":[153],"corpora":[156],"unavailable.":[158]},"counts_by_year":[{"year":2022,"cited_by_count":1},{"year":2019,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
