{"id":"https://openalex.org/W3194993673","doi":"https://doi.org/10.1145/3458250","title":"Linguistic Resources for Bhojpuri, Magahi, and Maithili: Statistics about Them, Their Similarity Estimates, and Baselines for Three Applications","display_name":"Linguistic Resources for Bhojpuri, Magahi, and Maithili: Statistics about Them, Their Similarity Estimates, and Baselines for Three Applications","publication_year":2021,"publication_date":"2021-09-13","ids":{"openalex":"https://openalex.org/W3194993673","doi":"https://doi.org/10.1145/3458250","mag":"3194993673"},"language":"en","primary_location":{"id":"doi:10.1145/3458250","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3458250","pdf_url":null,"source":{"id":"https://openalex.org/S4306421405","display_name":"ACM Transactions on Asian and Low-Resource Language Information Processing","issn_l":"2375-4699","issn":["2375-4699","2375-4702"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Asian and Low-Resource Language Information Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5003314662","display_name":"Rajesh Kumar Mundotiya","orcid":"https://orcid.org/0000-0002-0096-2440"},"institutions":[{"id":"https://openalex.org/I56404289","display_name":"Indian Institute of Technology BHU","ror":"https://ror.org/01kh5gc44","country_code":"IN","type":"education","lineage":["https://openalex.org/I56404289"]}],"countries":["IN"],"is_corresponding":true,"raw_author_name":"Rajesh Kumar Mundotiya","raw_affiliation_strings":["Indian Institute of Technology (BHU), U.P., India"],"affiliations":[{"raw_affiliation_string":"Indian Institute of Technology (BHU), U.P., India","institution_ids":["https://openalex.org/I56404289"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103792723","display_name":"Manish Kumar Singh","orcid":"https://orcid.org/0009-0005-0649-9310"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Manish Kumar Singh","raw_affiliation_strings":["Alexa AI, Amazon, Bangalore, Karnataka, India"],"affiliations":[{"raw_affiliation_string":"Alexa AI, Amazon, Bangalore, Karnataka, India","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5073001781","display_name":"Rahul Kapur","orcid":null},"institutions":[{"id":"https://openalex.org/I4210139030","display_name":"Samsung (India)","ror":"https://ror.org/04cpx2569","country_code":"IN","type":"company","lineage":["https://openalex.org/I2250650973","https://openalex.org/I4210139030"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Rahul Kapur","raw_affiliation_strings":["Samsung Research Institute, Noida, U.P., India"],"affiliations":[{"raw_affiliation_string":"Samsung Research Institute, Noida, U.P., India","institution_ids":["https://openalex.org/I4210139030"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5068112898","display_name":"Swasti Mishra","orcid":"https://orcid.org/0000-0003-3854-7637"},"institutions":[{"id":"https://openalex.org/I56404289","display_name":"Indian Institute of Technology BHU","ror":"https://ror.org/01kh5gc44","country_code":"IN","type":"education","lineage":["https://openalex.org/I56404289"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Swasti Mishra","raw_affiliation_strings":["Indian Institute of Technology (BHU), India"],"affiliations":[{"raw_affiliation_string":"Indian Institute of Technology (BHU), India","institution_ids":["https://openalex.org/I56404289"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5015174369","display_name":"Anil Kumar Singh","orcid":"https://orcid.org/0000-0001-7177-5901"},"institutions":[{"id":"https://openalex.org/I56404289","display_name":"Indian Institute of Technology BHU","ror":"https://ror.org/01kh5gc44","country_code":"IN","type":"education","lineage":["https://openalex.org/I56404289"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Anil Kumar Singh","raw_affiliation_strings":["Indian Institute of Technology (BHU), U.P., India"],"affiliations":[{"raw_affiliation_string":"Indian Institute of Technology (BHU), U.P., India","institution_ids":["https://openalex.org/I56404289"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5003314662"],"corresponding_institution_ids":["https://openalex.org/I56404289"],"apc_list":null,"apc_paid":null,"fwci":1.0877,"has_fulltext":false,"cited_by_count":12,"citation_normalized_percentile":{"value":0.81871497,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":"20","issue":"6","first_page":"1","last_page":"37"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13629","display_name":"Text Readability and Simplification","score":0.9962999820709229,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/hindi","display_name":"Hindi","score":0.7796931266784668},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7588330507278442},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.6783044338226318},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6008366942405701},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.501262903213501},{"id":"https://openalex.org/keywords/syllable","display_name":"Syllable","score":0.47175130248069763},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.4158293604850769},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.1829855740070343}],"concepts":[{"id":"https://openalex.org/C519982507","wikidata":"https://www.wikidata.org/wiki/Q1568","display_name":"Hindi","level":2,"score":0.7796931266784668},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7588330507278442},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6783044338226318},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6008366942405701},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.501262903213501},{"id":"https://openalex.org/C109089402","wikidata":"https://www.wikidata.org/wiki/Q8188","display_name":"Syllable","level":2,"score":0.47175130248069763},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.4158293604850769},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.1829855740070343},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3458250","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3458250","pdf_url":null,"source":{"id":"https://openalex.org/S4306421405","display_name":"ACM Transactions on Asian and Low-Resource Language Information Processing","issn_l":"2375-4699","issn":["2375-4699","2375-4702"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Asian and Low-Resource Language Information Processing","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.6000000238418579,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":55,"referenced_works":["https://openalex.org/W1524281572","https://openalex.org/W1573409773","https://openalex.org/W1598996557","https://openalex.org/W1650670709","https://openalex.org/W1990098277","https://openalex.org/W2002391215","https://openalex.org/W2007035566","https://openalex.org/W2014552602","https://openalex.org/W2041197998","https://openalex.org/W2048176942","https://openalex.org/W2053154970","https://openalex.org/W2054145113","https://openalex.org/W2076977565","https://openalex.org/W2081748579","https://openalex.org/W2102381086","https://openalex.org/W2104630935","https://openalex.org/W2123138964","https://openalex.org/W2148803121","https://openalex.org/W2156515921","https://openalex.org/W2159544173","https://openalex.org/W2250618788","https://openalex.org/W2250662591","https://openalex.org/W2251351090","https://openalex.org/W2251530528","https://openalex.org/W2285714660","https://openalex.org/W2317595117","https://openalex.org/W2483327705","https://openalex.org/W2489230015","https://openalex.org/W2539011526","https://openalex.org/W2540747174","https://openalex.org/W2561675875","https://openalex.org/W2608486090","https://openalex.org/W2613895792","https://openalex.org/W2618812092","https://openalex.org/W2620949368","https://openalex.org/W2739911518","https://openalex.org/W2741692265","https://openalex.org/W2769280657","https://openalex.org/W2790740913","https://openalex.org/W2798838126","https://openalex.org/W2912145841","https://openalex.org/W2919018448","https://openalex.org/W2942583857","https://openalex.org/W2962702662","https://openalex.org/W2963002901","https://openalex.org/W2963088995","https://openalex.org/W2963706833","https://openalex.org/W2964842416","https://openalex.org/W2972769038","https://openalex.org/W2994651209","https://openalex.org/W3104723404","https://openalex.org/W3114684923","https://openalex.org/W4238974971","https://openalex.org/W4391418904","https://openalex.org/W6723165035"],"related_works":["https://openalex.org/W2384553807","https://openalex.org/W1557888283","https://openalex.org/W3089999372","https://openalex.org/W2265245145","https://openalex.org/W3110423299","https://openalex.org/W4312229285","https://openalex.org/W2738278463","https://openalex.org/W4308939443","https://openalex.org/W4299528489","https://openalex.org/W2243342922"],"abstract_inverted_index":{"Corpus":[0],"preparation":[1],"for":[2,6,94,128,271,278,285,293,305,315,365,371,412,423],"low-resource":[3,62],"languages":[4,36,50,63,97,198,241,336],"and":[5,37,43,48,101,120,135,146,156,158,170,242,248,267,274,282,287,302,325,339,380,385,420],"development":[7],"of":[8,27,34,51,55,186,203,356,362,396],"human":[9],"language":[10,368,397,403,421],"technology":[11],"to":[12,24,40,65,75,104,117,161,191,199,235,255,337],"analyze":[13],"or":[14],"computationally":[15],"process":[16],"them":[17,88,103,379],"is":[18,78,84,359,409],"a":[19,79,180,340,392],"laborious":[20],"task,":[21],"primarily":[22],"due":[23,39],"the":[25,41,52,58,66,105,110,187,193,197,201,215,227,240,243,250,258,306,332,348,357,360],"unavailability":[26],"expert":[28],"linguists":[29],"who":[30],"are":[31,61,72,229,264,280],"native":[32],"speakers":[33],"these":[35,95,129,294,372,424],"also":[38,141,330],"time":[42],"resources":[44,364],"required.":[45],"Bhojpuri,":[46,272],"Magahi,":[47,273],"Maithili,":[49,288],"Purvanchal":[53],"region":[54],"India":[56],"(in":[57],"north-eastern":[59],"parts),":[60],"belonging":[64],"Indo-Aryan":[67],"(or":[68,173],"Indic)":[69],"family.":[70],"They":[71],"closely":[73,425],"related":[74,426],"Hindi,":[76],"which":[77,83,320],"relatively":[80],"high-resource":[81],"language,":[82],"why":[85],"we":[86,189,233,390],"compare":[87],"with":[89,143,179,386],"Hindi.":[90,387],"We":[91,122,328],"collected":[92],"corpora":[93,130,139,311],"three":[96,307,413],"from":[98],"various":[99],"sources":[100],"cleaned":[102],"extent":[106],"possible,":[107],"without":[108],"changing":[109],"data":[111,262],"in":[112,207,347],"them.":[113],"The":[114,149,175,260,276,290,353],"text":[115],"belongs":[116],"different":[118],"domains":[119],"genres.":[121],"calculated":[123],"some":[124,208,237,375],"basic":[125,150,363,414],"statistical":[126,151],"measures":[127,152,377],"at":[131],"character,":[132],"word,":[133],"syllable,":[134],"morpheme":[136],"levels.":[137],"These":[138,309],"were":[140,153,159,177,223],"annotated":[142],"parts-of-speech":[144],"(POS)":[145],"chunk":[147],"tags.":[148],"both":[154],"absolute":[155],"relative":[157],"expected":[160],"indicate":[162],"linguistic":[163],"properties,":[164],"such":[165],"as":[166,350],"morphological,":[167],"lexical,":[168],"phonological,":[169],"syntactic":[171],"complexities":[172],"richness).":[174],"results":[176,228],"compared":[178],"standard":[181],"Hindi":[182],"corpus.":[183],"For":[184,245,388],"most":[185],"measures,":[188],"tried":[190,234],"match":[192],"corpus":[194,204,217],"size":[195],"across":[196],"avoid":[200],"effect":[202],"size,":[205],"but":[206],"cases":[209],"it":[210],"turned":[211],"out":[212],"that":[213],"using":[214,296],"full":[216],"was":[218,253,299],"better,":[219],"even":[220],"if":[221],"sizes":[222,263,277],"very":[224,231],"different.":[225],"Although":[226],"not":[230],"clear,":[232],"draw":[236],"conclusions":[238],"about":[239,378],"corpora.":[244],"POS":[246,322],"tagging":[247],"chunking,":[249,419],"BIS":[251],"tagset":[252],"used":[254,314],"manually":[256],"annotate":[257],"data.":[259],"POS-tagged":[261],"16,067,":[265],"14,669,":[266],"12,310":[268],"sentences,":[269],"respectively,":[270,304],"Maithili.":[275],"chunking":[279],"9,695":[281],"1,954":[283],"sentences":[284],"Bhojpuri":[286],"respectively.":[289],"inter-annotator":[291],"agreement":[292],"annotations,":[295],"Cohen\u2019s":[297],"Kappa,":[298],"0.92,":[300],"0.64,":[301],"0.74,":[303],"languages.":[308,427],"(annotated)":[310],"have":[312,329],"been":[313],"developing":[316],"preliminary":[317],"automated":[318],"tools,":[319],"include":[321],"tagger,":[323],"Chunker,":[324],"Language":[326],"Identifier.":[327],"developed":[331],"Bilingual":[333],"dictionary":[334],"(Purvanchal":[335],"Hindi)":[338],"Synset":[341],"(that":[342],"can":[343],"be":[344],"integrated":[345],"later":[346],"Indo-WordNet)":[349],"additional":[351,407],"resources.":[352],"main":[354],"contribution":[355,408],"work":[358],"creation":[361],"facilitating":[366],"further":[367],"processing":[369],"research":[370],"languages,":[373],"providing":[374,410],"quantitative":[376],"their":[381],"similarities":[382],"among":[383],"themselves":[384],"similarities,":[389],"use":[391],"somewhat":[393],"novel":[394],"measure":[395],"similarity":[398],"based":[399],"on":[400],"an":[401],"n-gram-based":[402],"identification":[404],"algorithm.":[405],"An":[406],"baselines":[411],"NLP":[415],"applications":[416],"(POS":[417],"tagging,":[418],"identification)":[422]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":3},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
