{"id":"https://openalex.org/W2275469543","doi":"https://doi.org/10.1080/09296174.2015.1071151","title":"Large Scale Quantitative Analysis of three Indo-Aryan Languages","display_name":"Large Scale Quantitative Analysis of three Indo-Aryan Languages","publication_year":2016,"publication_date":"2016-01-02","ids":{"openalex":"https://openalex.org/W2275469543","doi":"https://doi.org/10.1080/09296174.2015.1071151","mag":"2275469543"},"language":"en","primary_location":{"id":"doi:10.1080/09296174.2015.1071151","is_oa":false,"landing_page_url":"https://doi.org/10.1080/09296174.2015.1071151","pdf_url":null,"source":{"id":"https://openalex.org/S24321443","display_name":"Journal of Quantitative Linguistics","issn_l":"0929-6174","issn":["0929-6174","1744-5035"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319847","host_organization_name":"Routledge","host_organization_lineage":["https://openalex.org/P4310319847"],"host_organization_lineage_names":["Routledge"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Quantitative Linguistics","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5052157244","display_name":"Parth Mehta","orcid":"https://orcid.org/0000-0002-4509-1298"},"institutions":[{"id":"https://openalex.org/I98389781","display_name":"Dhirubhai Ambani Institute of Information and Communication Technology","ror":"https://ror.org/02d5b7g69","country_code":"IN","type":"education","lineage":["https://openalex.org/I98389781"]}],"countries":["IN"],"is_corresponding":true,"raw_author_name":"Parth Mehta","raw_affiliation_strings":["Dhirubhai Ambani Institute of Information and Communication Technology, Gandhinagar, India"],"affiliations":[{"raw_affiliation_string":"Dhirubhai Ambani Institute of Information and Communication Technology, Gandhinagar, India","institution_ids":["https://openalex.org/I98389781"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5026950398","display_name":"Prasenjit Majumder","orcid":"https://orcid.org/0000-0003-0840-9313"},"institutions":[{"id":"https://openalex.org/I98389781","display_name":"Dhirubhai Ambani Institute of Information and Communication Technology","ror":"https://ror.org/02d5b7g69","country_code":"IN","type":"education","lineage":["https://openalex.org/I98389781"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Prasenjit Majumder","raw_affiliation_strings":["Dhirubhai Ambani Institute of Information and Communication Technology, Gandhinagar, India"],"affiliations":[{"raw_affiliation_string":"Dhirubhai Ambani Institute of Information and Communication Technology, Gandhinagar, India","institution_ids":["https://openalex.org/I98389781"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5052157244"],"corresponding_institution_ids":["https://openalex.org/I98389781"],"apc_list":null,"apc_paid":null,"fwci":1.2854,"has_fulltext":false,"cited_by_count":8,"citation_normalized_percentile":{"value":0.861356,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":"23","issue":"1","first_page":"109","last_page":"132"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9970999956130981,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9970999956130981,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12380","display_name":"Authorship Attribution and Profiling","score":0.9869999885559082,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9606999754905701,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/bengali","display_name":"Bengali","score":0.7286627292633057},{"id":"https://openalex.org/keywords/hindi","display_name":"Hindi","score":0.7278246879577637},{"id":"https://openalex.org/keywords/zipfs-law","display_name":"Zipf's law","score":0.6951186060905457},{"id":"https://openalex.org/keywords/term","display_name":"Term (time)","score":0.6937196254730225},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6123345494270325},{"id":"https://openalex.org/keywords/scale","display_name":"Scale (ratio)","score":0.547073483467102},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5397464036941528},{"id":"https://openalex.org/keywords/population","display_name":"Population","score":0.5063551664352417},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5059694647789001},{"id":"https://openalex.org/keywords/distribution","display_name":"Distribution (mathematics)","score":0.412164568901062},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.34941285848617554},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.2803003787994385},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.2690648138523102},{"id":"https://openalex.org/keywords/geography","display_name":"Geography","score":0.1798955202102661},{"id":"https://openalex.org/keywords/sociology","display_name":"Sociology","score":0.1310880482196808},{"id":"https://openalex.org/keywords/cartography","display_name":"Cartography","score":0.09922963380813599}],"concepts":[{"id":"https://openalex.org/C19235068","wikidata":"https://www.wikidata.org/wiki/Q9610","display_name":"Bengali","level":2,"score":0.7286627292633057},{"id":"https://openalex.org/C519982507","wikidata":"https://www.wikidata.org/wiki/Q1568","display_name":"Hindi","level":2,"score":0.7278246879577637},{"id":"https://openalex.org/C125932096","wikidata":"https://www.wikidata.org/wiki/Q205472","display_name":"Zipf's law","level":2,"score":0.6951186060905457},{"id":"https://openalex.org/C61797465","wikidata":"https://www.wikidata.org/wiki/Q1188986","display_name":"Term (time)","level":2,"score":0.6937196254730225},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6123345494270325},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.547073483467102},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5397464036941528},{"id":"https://openalex.org/C2908647359","wikidata":"https://www.wikidata.org/wiki/Q2625603","display_name":"Population","level":2,"score":0.5063551664352417},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5059694647789001},{"id":"https://openalex.org/C110121322","wikidata":"https://www.wikidata.org/wiki/Q865811","display_name":"Distribution (mathematics)","level":2,"score":0.412164568901062},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.34941285848617554},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.2803003787994385},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.2690648138523102},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.1798955202102661},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.1310880482196808},{"id":"https://openalex.org/C58640448","wikidata":"https://www.wikidata.org/wiki/Q42515","display_name":"Cartography","level":1,"score":0.09922963380813599},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C149923435","wikidata":"https://www.wikidata.org/wiki/Q37732","display_name":"Demography","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1080/09296174.2015.1071151","is_oa":false,"landing_page_url":"https://doi.org/10.1080/09296174.2015.1071151","pdf_url":null,"source":{"id":"https://openalex.org/S24321443","display_name":"Journal of Quantitative Linguistics","issn_l":"0929-6174","issn":["0929-6174","1744-5035"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319847","host_organization_name":"Routledge","host_organization_lineage":["https://openalex.org/P4310319847"],"host_organization_lineage_names":["Routledge"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Quantitative Linguistics","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.5299999713897705,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":9,"referenced_works":["https://openalex.org/W1969075471","https://openalex.org/W2013568550","https://openalex.org/W2024245102","https://openalex.org/W2028587098","https://openalex.org/W2050184088","https://openalex.org/W2056898670","https://openalex.org/W2078817043","https://openalex.org/W2251958572","https://openalex.org/W2286975227"],"related_works":["https://openalex.org/W2084490135","https://openalex.org/W2913711322","https://openalex.org/W2402354285","https://openalex.org/W2482488296","https://openalex.org/W2251015319","https://openalex.org/W10843010","https://openalex.org/W2600299847","https://openalex.org/W2072916690","https://openalex.org/W4391800903","https://openalex.org/W2322164278"],"abstract_inverted_index":{"In":[0],"this":[1,130],"paper,":[2],"we":[3],"present":[4],"a":[5,50,142],"thorough":[6],"quantitative":[7],"analysis":[8],"of":[9,14,58,65,69,111,120,127,136],"large":[10,143],"scale":[11],"media":[12,35,70],"text":[13,36,71],"three":[15,138],"Indo-Aryan":[16],"languages,":[17],"viz.":[18],"Hindi,":[19],"Gujarati":[20],"and":[21,33,43,67,115],"Bengali.":[22],"Population":[23],"wise":[24],"they":[25],"together":[26],"amount":[27],"to":[28,53,83,97],"600":[29],"million":[30],"speakers.":[31],"Understanding":[32],"processing":[34],"is":[37,89,131],"very":[38],"important":[39],"from":[40],"sociological,":[41],"cultural":[42],"information":[44],"science/theoretic":[45],"stand":[46],"points.":[47],"We":[48,75,95],"did":[49],"detailed":[51],"study":[52,62,135],"understand":[54],"the":[55,87,99,112,118,121,125,132],"statistical":[56],"nature":[57],"these":[59,137],"data.":[60],"The":[61],"demonstrates":[63],"effect":[64,108],"size":[66],"category":[68],"on":[72,109,117,140],"term":[73,102,113,122],"distributions.":[74],"establish":[76],"that":[77,116],"while":[78],"higher":[79],"order":[80],"n-grams":[81],"tend":[82],"follow":[84],"Zipf\u2019s":[85],"law,":[86],"same":[88],"not":[90],"always":[91],"true":[92],"for":[93],"unigrams.":[94],"attempt":[96],"model":[98],"change":[100],"in":[101,104],"distribution":[103,114],"two":[105],"separate":[106],"parts:":[107],"steepness":[110],"tail":[119],"distribution.":[123],"To":[124],"best":[126],"our":[128],"knowledge":[129],"first":[133],"exploratory":[134],"languages":[139],"such":[141],"scale.":[144]},"counts_by_year":[{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":2},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":1},{"year":2017,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
