{"id":"https://openalex.org/W2070611574","doi":"https://doi.org/10.1145/1386869.1386871","title":"Acquisition of Morphology of an Indic Language from Text Corpus","display_name":"Acquisition of Morphology of an Indic Language from Text Corpus","publication_year":2008,"publication_date":"2008-06-01","ids":{"openalex":"https://openalex.org/W2070611574","doi":"https://doi.org/10.1145/1386869.1386871","mag":"2070611574"},"language":"en","primary_location":{"id":"doi:10.1145/1386869.1386871","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1386869.1386871","pdf_url":null,"source":{"id":"https://openalex.org/S56575750","display_name":"ACM Transactions on Asian Language Information Processing","issn_l":"1530-0226","issn":["1530-0226","1558-3430"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Asian Language Information Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5109343788","display_name":"Utpal Sharma","orcid":null},"institutions":[{"id":"https://openalex.org/I126601174","display_name":"Tezpur University","ror":"https://ror.org/005x56091","country_code":"IN","type":"education","lineage":["https://openalex.org/I126601174"]}],"countries":["IN"],"is_corresponding":true,"raw_author_name":"Utpal Sharma","raw_affiliation_strings":["Tezpur University"],"affiliations":[{"raw_affiliation_string":"Tezpur University","institution_ids":["https://openalex.org/I126601174"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Jugal K. Kalita","orcid":null},"institutions":[{"id":"https://openalex.org/I2802236040","display_name":"University of Colorado System","ror":"https://ror.org/00jc20583","country_code":"US","type":"education","lineage":["https://openalex.org/I2802236040"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jugal K. Kalita","raw_affiliation_strings":["University of Colorado"],"affiliations":[{"raw_affiliation_string":"University of Colorado","institution_ids":["https://openalex.org/I2802236040"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5022024089","display_name":"Rajib Das","orcid":"https://orcid.org/0000-0002-4962-6448"},"institutions":[{"id":"https://openalex.org/I106542073","display_name":"University of Calcutta","ror":"https://ror.org/01e7v7w47","country_code":"IN","type":"education","lineage":["https://openalex.org/I106542073"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Rajib K. Das","raw_affiliation_strings":["Calcutta University"],"affiliations":[{"raw_affiliation_string":"Calcutta University","institution_ids":["https://openalex.org/I106542073"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5109343788"],"corresponding_institution_ids":["https://openalex.org/I126601174"],"apc_list":null,"apc_paid":null,"fwci":1.7594,"has_fulltext":false,"cited_by_count":19,"citation_normalized_percentile":{"value":0.8764252,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":"7","issue":"3","first_page":"1","last_page":"33"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.996999979019165,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/assamese","display_name":"Assamese","score":0.9328815340995789},{"id":"https://openalex.org/keywords/suffix","display_name":"Suffix","score":0.8272534608840942},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.806212306022644},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.7105839252471924},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6435732841491699},{"id":"https://openalex.org/keywords/salient","display_name":"Salient","score":0.6028521060943604},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.3987185060977936}],"concepts":[{"id":"https://openalex.org/C2777834912","wikidata":"https://www.wikidata.org/wiki/Q29401","display_name":"Assamese","level":2,"score":0.9328815340995789},{"id":"https://openalex.org/C2779804580","wikidata":"https://www.wikidata.org/wiki/Q102047","display_name":"Suffix","level":2,"score":0.8272534608840942},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.806212306022644},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.7105839252471924},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6435732841491699},{"id":"https://openalex.org/C2780719617","wikidata":"https://www.wikidata.org/wiki/Q1030752","display_name":"Salient","level":2,"score":0.6028521060943604},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.3987185060977936},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/1386869.1386871","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1386869.1386871","pdf_url":null,"source":{"id":"https://openalex.org/S56575750","display_name":"ACM Transactions on Asian Language Information Processing","issn_l":"1530-0226","issn":["1530-0226","1558-3430"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Asian Language Information Processing","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.8299999833106995,"display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":16,"referenced_works":["https://openalex.org/W53308956","https://openalex.org/W98065071","https://openalex.org/W201532657","https://openalex.org/W615523980","https://openalex.org/W1516146643","https://openalex.org/W1792616884","https://openalex.org/W1970887833","https://openalex.org/W2059842576","https://openalex.org/W2061801881","https://openalex.org/W2098162425","https://openalex.org/W2101711363","https://openalex.org/W2140275137","https://openalex.org/W2150144720","https://openalex.org/W2167299284","https://openalex.org/W2592931181","https://openalex.org/W6766541215"],"related_works":["https://openalex.org/W70126818","https://openalex.org/W2595800753","https://openalex.org/W3136670730","https://openalex.org/W4294250292","https://openalex.org/W2891067899","https://openalex.org/W3111362937","https://openalex.org/W4232434157","https://openalex.org/W2047043392","https://openalex.org/W2016775285","https://openalex.org/W3174543885"],"abstract_inverted_index":{"This":[0,125],"article":[1],"describes":[2],"an":[3,11],"approach":[4],"to":[5,89,128,140,148,177],"unsupervised":[6],"learning":[7],"of":[8,32,77,81,96,116,145,181,202,205],"morphology":[9,73,146],"from":[10,161],"unannotated":[12],"corpus":[13],"for":[14,134,159],"a":[15,78,130,156,184,189],"highly":[16],"inflectional":[17],"Indo-European":[18],"language":[19,50],"called":[20],"Assamese":[21,29,72,160],"spoken":[22,51,103],"by":[23,110],"about":[24],"30":[25],"million":[26],"people.":[27],"Although":[28],"is":[30,59],"one":[31],"Indias":[33],"national":[34],"languages,":[35],"it":[36],"utterly":[37],"lacks":[38],"computational":[39,46],"linguistic":[40],"resources.":[41],"There":[42],"exists":[43],"no":[44],"prior":[45],"work":[47,57],"on":[48,114],"this":[49,62,65],"widely":[52],"in":[53,61,71,100],"northeast":[54],"India.":[55],"The":[56],"presented":[58],"pioneering":[60],"respect.":[63],"In":[64],"article,":[66],"we":[67,154,174],"discuss":[68],"salient":[69],"issues":[70],"where":[74],"the":[75,87,97,142,162,166,171,200],"presence":[76],"large":[79,190],"number":[80],"suffixal":[82],"determiners,":[83],"sandhi,":[84],"samas,":[85],"and":[86,102,112,119,170,197],"propensity":[88],"use":[90],"suffix":[91,136],"sequences":[92],"make":[93],"approximately":[94,194],"50%":[95],"words":[98],"used":[99],"written":[101],"text":[104,163],"inflected.":[105],"We":[106,151,192],"implement":[107],"methods":[108],"proposed":[109],"Gaussier":[111],"Goldsmith":[113],"acquisition":[115,147],"morphological":[117,157,167,172],"knowledge,":[118],"obtain":[120],"F-measure":[121,143],"performance":[122,144],"below":[123],"60%.":[124],"motivates":[126],"us":[127,139],"present":[129],"method":[131],"more":[132],"suitable":[133],"handling":[135],"sequences,":[137],"enabling":[138],"increase":[141],"almost":[149],"70%.":[150],"describe":[152],"how":[153],"build":[155],"dictionary":[158],"corpus.":[164,191],"Using":[165],"knowledge":[168],"acquired":[169],"dictionary,":[173],"are":[175],"able":[176],"process":[178],"small":[179,203],"chunks":[180,204],"data":[182],"at":[183],"time":[185],"as":[186,188],"well":[187],"achieve":[193],"85%":[195],"precision":[196],"recall":[198],"during":[199],"analysis":[201],"coherent":[206],"text.":[207]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":1},{"year":2017,"cited_by_count":2},{"year":2016,"cited_by_count":2},{"year":2015,"cited_by_count":1},{"year":2014,"cited_by_count":2},{"year":2013,"cited_by_count":2},{"year":2012,"cited_by_count":4}],"updated_date":"2026-04-21T08:09:41.155169","created_date":"2025-10-10T00:00:00"}
