{"id":"https://openalex.org/W2950568044","doi":"https://doi.org/10.1162/coli_a_00355","title":"Evaluating Computational Language Models with Scaling Properties of Natural Language","display_name":"Evaluating Computational Language Models with Scaling Properties of Natural Language","publication_year":2019,"publication_date":"2019-06-25","ids":{"openalex":"https://openalex.org/W2950568044","doi":"https://doi.org/10.1162/coli_a_00355","mag":"2950568044"},"language":"en","primary_location":{"id":"doi:10.1162/coli_a_00355","is_oa":true,"landing_page_url":"https://doi.org/10.1162/coli_a_00355","pdf_url":"https://direct.mit.edu/coli/article-pdf/45/3/481/1847468/coli_a_00355.pdf","source":{"id":"https://openalex.org/S155526855","display_name":"Computational Linguistics","issn_l":"0891-2017","issn":["0891-2017","1530-9312"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320244","host_organization_name":"Association for Computational Linguistics","host_organization_lineage":["https://openalex.org/P4310320244"],"host_organization_lineage_names":["Association for Computational Linguistics"],"type":"journal"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Computational Linguistics","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://direct.mit.edu/coli/article-pdf/45/3/481/1847468/coli_a_00355.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5058381648","display_name":"Shuntaro Takahashi","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Shuntaro Takahashi","raw_affiliation_strings":["Graduate School of Engineering, The University of Tokyo, Department of Advanced Interdisciplinary Studies"],"affiliations":[{"raw_affiliation_string":"Graduate School of Engineering, The University of Tokyo, Department of Advanced Interdisciplinary Studies","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5073822077","display_name":"Kumiko Tanaka\u2010Ishii","orcid":"https://orcid.org/0000-0003-1752-3951"},"institutions":[{"id":"https://openalex.org/I74801974","display_name":"The University of Tokyo","ror":"https://ror.org/057zh3y96","country_code":"JP","type":"education","lineage":["https://openalex.org/I74801974"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Kumiko Tanaka-Ishii","raw_affiliation_strings":["The University of Tokyo, Research Center for Advanced Science and Technology"],"affiliations":[{"raw_affiliation_string":"The University of Tokyo, Research Center for Advanced Science and Technology","institution_ids":["https://openalex.org/I74801974"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5058381648","https://openalex.org/A5073822077"],"corresponding_institution_ids":["https://openalex.org/I74801974"],"apc_list":null,"apc_paid":null,"fwci":1.4002,"has_fulltext":false,"cited_by_count":24,"citation_normalized_percentile":{"value":0.86162576,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":"45","issue":"3","first_page":"481","last_page":"513"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9955000281333923,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12090","display_name":"Language and cultural evolution","score":0.9951000213623047,"subfield":{"id":"https://openalex.org/subfields/3316","display_name":"Cultural Studies"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7758579850196838},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.6023891568183899},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.6008660197257996},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5882378816604614},{"id":"https://openalex.org/keywords/zipfs-law","display_name":"Zipf's law","score":0.5683364868164062},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.47538602352142334},{"id":"https://openalex.org/keywords/recurrent-neural-network","display_name":"Recurrent neural network","score":0.47161227464675903},{"id":"https://openalex.org/keywords/generative-grammar","display_name":"Generative grammar","score":0.43829044699668884},{"id":"https://openalex.org/keywords/grammar","display_name":"Grammar","score":0.41625961661338806},{"id":"https://openalex.org/keywords/vocabulary","display_name":"Vocabulary","score":0.413166344165802},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.4119194447994232},{"id":"https://openalex.org/keywords/probabilistic-logic","display_name":"Probabilistic logic","score":0.4117039144039154},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.346956729888916},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.20675435662269592},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.11534318327903748},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.0888429582118988}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7758579850196838},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.6023891568183899},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.6008660197257996},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5882378816604614},{"id":"https://openalex.org/C125932096","wikidata":"https://www.wikidata.org/wiki/Q205472","display_name":"Zipf's law","level":2,"score":0.5683364868164062},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.47538602352142334},{"id":"https://openalex.org/C147168706","wikidata":"https://www.wikidata.org/wiki/Q1457734","display_name":"Recurrent neural network","level":3,"score":0.47161227464675903},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.43829044699668884},{"id":"https://openalex.org/C26022165","wikidata":"https://www.wikidata.org/wiki/Q8091","display_name":"Grammar","level":2,"score":0.41625961661338806},{"id":"https://openalex.org/C2777601683","wikidata":"https://www.wikidata.org/wiki/Q6499736","display_name":"Vocabulary","level":2,"score":0.413166344165802},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.4119194447994232},{"id":"https://openalex.org/C49937458","wikidata":"https://www.wikidata.org/wiki/Q2599292","display_name":"Probabilistic logic","level":2,"score":0.4117039144039154},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.346956729888916},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.20675435662269592},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.11534318327903748},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0888429582118988},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1162/coli_a_00355","is_oa":true,"landing_page_url":"https://doi.org/10.1162/coli_a_00355","pdf_url":"https://direct.mit.edu/coli/article-pdf/45/3/481/1847468/coli_a_00355.pdf","source":{"id":"https://openalex.org/S155526855","display_name":"Computational Linguistics","issn_l":"0891-2017","issn":["0891-2017","1530-9312"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320244","host_organization_name":"Association for Computational Linguistics","host_organization_lineage":["https://openalex.org/P4310320244"],"host_organization_lineage_names":["Association for Computational Linguistics"],"type":"journal"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Computational Linguistics","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:6bdec7e392704971872d36017ee907dd","is_oa":true,"landing_page_url":"https://doaj.org/article/6bdec7e392704971872d36017ee907dd","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Computational Linguistics, Vol 45, Iss 3, Pp 481-513 (2019)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1162/coli_a_00355","is_oa":true,"landing_page_url":"https://doi.org/10.1162/coli_a_00355","pdf_url":"https://direct.mit.edu/coli/article-pdf/45/3/481/1847468/coli_a_00355.pdf","source":{"id":"https://openalex.org/S155526855","display_name":"Computational Linguistics","issn_l":"0891-2017","issn":["0891-2017","1530-9312"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320244","host_organization_name":"Association for Computational Linguistics","host_organization_lineage":["https://openalex.org/P4310320244"],"host_organization_lineage_names":["Association for Computational Linguistics"],"type":"journal"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Computational Linguistics","raw_type":"journal-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.7200000286102295,"display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2950568044.pdf","grobid_xml":"https://content.openalex.org/works/W2950568044.grobid-xml"},"referenced_works_count":64,"referenced_works":["https://openalex.org/W179875071","https://openalex.org/W1509465553","https://openalex.org/W1574901103","https://openalex.org/W1578733506","https://openalex.org/W1593271688","https://openalex.org/W1631260214","https://openalex.org/W1861492603","https://openalex.org/W1934041838","https://openalex.org/W1964274103","https://openalex.org/W1989338016","https://openalex.org/W1999965501","https://openalex.org/W2000042664","https://openalex.org/W2008203686","https://openalex.org/W2017392697","https://openalex.org/W2019588402","https://openalex.org/W2033634113","https://openalex.org/W2036671379","https://openalex.org/W2040940389","https://openalex.org/W2064675550","https://openalex.org/W2093979889","https://openalex.org/W2096602434","https://openalex.org/W2101105183","https://openalex.org/W2131924950","https://openalex.org/W2134237567","https://openalex.org/W2134286110","https://openalex.org/W2142384583","https://openalex.org/W2143017621","https://openalex.org/W2152407964","https://openalex.org/W2154099718","https://openalex.org/W2154652894","https://openalex.org/W2157331557","https://openalex.org/W2158195707","https://openalex.org/W2486932837","https://openalex.org/W2557287083","https://openalex.org/W2571859396","https://openalex.org/W2593383075","https://openalex.org/W2612690371","https://openalex.org/W2616969219","https://openalex.org/W2620623908","https://openalex.org/W2684886669","https://openalex.org/W2736889448","https://openalex.org/W2784823820","https://openalex.org/W2785896739","https://openalex.org/W2792376130","https://openalex.org/W2798025142","https://openalex.org/W2899779804","https://openalex.org/W2952436057","https://openalex.org/W2953320089","https://openalex.org/W2962832505","https://openalex.org/W2963248348","https://openalex.org/W2963262988","https://openalex.org/W2963494889","https://openalex.org/W2963537482","https://openalex.org/W2963748792","https://openalex.org/W2964017345","https://openalex.org/W2964028737","https://openalex.org/W2964268978","https://openalex.org/W3101791345","https://openalex.org/W3102543255","https://openalex.org/W3103362336","https://openalex.org/W3105894932","https://openalex.org/W4205834226","https://openalex.org/W4230526081","https://openalex.org/W4285719527"],"related_works":["https://openalex.org/W3123832408","https://openalex.org/W2137719806","https://openalex.org/W2744330362","https://openalex.org/W2044606595","https://openalex.org/W2096748633","https://openalex.org/W2963431268","https://openalex.org/W2160451571","https://openalex.org/W2495256954","https://openalex.org/W2259317772","https://openalex.org/W2736889448"],"abstract_inverted_index":{"In":[0],"this":[1],"article,":[2],"we":[3,78,154],"evaluate":[4],"computational":[5,75,133],"models":[6,88,108,134],"of":[7,17,47,74,142,159,166],"natural":[8,18,26,143],"language":[9,27,81,87,94,107],"with":[10,114,148],"respect":[11],"to":[12],"the":[13,36,40,44,131,138,157],"universal":[14],"statistical":[15],"behaviors":[16],"language.":[19,144],"Statistical":[20],"mechanical":[21],"analyses":[22],"have":[23],"revealed":[24],"that":[25,106,135,156],"text":[28,101],"is":[29,162],"characterized":[30],"by":[31,57],"scaling":[32,54],"properties,":[33],"which":[34],"quantify":[35],"global":[37],"structure":[38],"in":[39],"vocabulary":[41],"population":[42],"and":[43,66,96,126],"long":[45,119,139],"memory":[46,140],"a":[48,83,115,122,163],"text.":[49],"We":[50],"study":[51],"whether":[52],"five":[53],"properties":[55],"(given":[56],"Zipf\u2019s":[58],"law,":[59,61,65],"Heaps\u2019":[60],"Ebeling\u2019s":[62],"method,":[63],"Taylor\u2019s":[64,160],"long-range":[67],"correlation":[68],"analysis)":[69],"can":[70,136],"serve":[71],"for":[72,100],"evaluation":[73,152],"models.":[76],"Specifically,":[77],"test":[79],"n-gram":[80],"models,":[82,95],"probabilistic":[84],"context-free":[85],"grammar,":[86],"based":[89,109],"on":[90,110],"Simon/Pitman-Yor":[91],"processes,":[92],"neural":[93,112,128],"generative":[97],"adversarial":[98],"networks":[99,113],"generation.":[102],"Our":[103],"analysis":[104],"reveals":[105],"recurrent":[111,124],"gating":[116],"mechanism":[117],"(i.e.,":[118],"short-term":[120],"memory;":[121],"gated":[123],"unit;":[125],"quasi-recurrent":[127],"networks)":[129],"are":[130],"only":[132],"reproduce":[137],"behavior":[141],"Furthermore,":[145],"through":[146],"comparison":[147],"recently":[149],"proposed":[150],"model-based":[151],"methods,":[153],"find":[155],"exponent":[158],"law":[161],"good":[164],"indicator":[165],"model":[167],"quality.":[168]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":6},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":5},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":5},{"year":2020,"cited_by_count":2},{"year":2019,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
