{"id":"https://openalex.org/W2165921245","doi":"https://doi.org/10.3115/v1/p14-1127","title":"Unsupervised Morphology-Based Vocabulary Expansion","display_name":"Unsupervised Morphology-Based Vocabulary Expansion","publication_year":2014,"publication_date":"2014-01-01","ids":{"openalex":"https://openalex.org/W2165921245","doi":"https://doi.org/10.3115/v1/p14-1127","mag":"2165921245"},"language":"en","primary_location":{"id":"doi:10.3115/v1/p14-1127","is_oa":true,"landing_page_url":"https://doi.org/10.3115/v1/p14-1127","pdf_url":"https://aclanthology.org/P14-1127.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 52nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://aclanthology.org/P14-1127.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5087104756","display_name":"Mohammad Sadegh Rasooli","orcid":null},"institutions":[{"id":"https://openalex.org/I78577930","display_name":"Columbia University","ror":"https://ror.org/00hj8s172","country_code":"US","type":"education","lineage":["https://openalex.org/I78577930"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Mohammad Sadegh Rasooli","raw_affiliation_strings":["Columbia University"],"affiliations":[{"raw_affiliation_string":"Columbia University","institution_ids":["https://openalex.org/I78577930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5069311266","display_name":"Thomas Lippincott","orcid":null},"institutions":[{"id":"https://openalex.org/I78577930","display_name":"Columbia University","ror":"https://ror.org/00hj8s172","country_code":"US","type":"education","lineage":["https://openalex.org/I78577930"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Thomas Lippincott","raw_affiliation_strings":["Columbia University"],"affiliations":[{"raw_affiliation_string":"Columbia University","institution_ids":["https://openalex.org/I78577930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5084517393","display_name":"Nizar Habash","orcid":"https://orcid.org/0000-0002-1831-3457"},"institutions":[{"id":"https://openalex.org/I78577930","display_name":"Columbia University","ror":"https://ror.org/00hj8s172","country_code":"US","type":"education","lineage":["https://openalex.org/I78577930"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Nizar Habash","raw_affiliation_strings":["Columbia University"],"affiliations":[{"raw_affiliation_string":"Columbia University","institution_ids":["https://openalex.org/I78577930"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5021314411","display_name":"Owen Rambow","orcid":null},"institutions":[{"id":"https://openalex.org/I78577930","display_name":"Columbia University","ror":"https://ror.org/00hj8s172","country_code":"US","type":"education","lineage":["https://openalex.org/I78577930"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Owen Rambow","raw_affiliation_strings":["Columbia University"],"affiliations":[{"raw_affiliation_string":"Columbia University","institution_ids":["https://openalex.org/I78577930"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5087104756"],"corresponding_institution_ids":["https://openalex.org/I78577930"],"apc_list":null,"apc_paid":null,"fwci":4.9081,"has_fulltext":false,"cited_by_count":20,"citation_normalized_percentile":{"value":0.9541912,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1349","last_page":"1359"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9944999814033508,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8290739059448242},{"id":"https://openalex.org/keywords/vocabulary","display_name":"Vocabulary","score":0.7853634357452393},{"id":"https://openalex.org/keywords/assamese","display_name":"Assamese","score":0.7591938972473145},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.6447248458862305},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6290959119796753},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5754848122596741},{"id":"https://openalex.org/keywords/security-token","display_name":"Security token","score":0.5529440641403198},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.5467126369476318},{"id":"https://openalex.org/keywords/character","display_name":"Character (mathematics)","score":0.5455760955810547},{"id":"https://openalex.org/keywords/optical-character-recognition","display_name":"Optical character recognition","score":0.49499839544296265},{"id":"https://openalex.org/keywords/test","display_name":"Test (biology)","score":0.49004065990448},{"id":"https://openalex.org/keywords/resource","display_name":"Resource (disambiguation)","score":0.4737055003643036},{"id":"https://openalex.org/keywords/test-set","display_name":"Test set","score":0.4627390503883362},{"id":"https://openalex.org/keywords/generator","display_name":"Generator (circuit theory)","score":0.45540720224380493},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.382354736328125},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.22615697979927063},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.08573707938194275},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.077360600233078}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8290739059448242},{"id":"https://openalex.org/C2777601683","wikidata":"https://www.wikidata.org/wiki/Q6499736","display_name":"Vocabulary","level":2,"score":0.7853634357452393},{"id":"https://openalex.org/C2777834912","wikidata":"https://www.wikidata.org/wiki/Q29401","display_name":"Assamese","level":2,"score":0.7591938972473145},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6447248458862305},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6290959119796753},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5754848122596741},{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.5529440641403198},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.5467126369476318},{"id":"https://openalex.org/C2780861071","wikidata":"https://www.wikidata.org/wiki/Q1062934","display_name":"Character (mathematics)","level":2,"score":0.5455760955810547},{"id":"https://openalex.org/C546480517","wikidata":"https://www.wikidata.org/wiki/Q167555","display_name":"Optical character recognition","level":3,"score":0.49499839544296265},{"id":"https://openalex.org/C2777267654","wikidata":"https://www.wikidata.org/wiki/Q3519023","display_name":"Test (biology)","level":2,"score":0.49004065990448},{"id":"https://openalex.org/C206345919","wikidata":"https://www.wikidata.org/wiki/Q20380951","display_name":"Resource (disambiguation)","level":2,"score":0.4737055003643036},{"id":"https://openalex.org/C169903167","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Test set","level":2,"score":0.4627390503883362},{"id":"https://openalex.org/C2780992000","wikidata":"https://www.wikidata.org/wiki/Q17016113","display_name":"Generator (circuit theory)","level":3,"score":0.45540720224380493},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.382354736328125},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.22615697979927063},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.08573707938194275},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.077360600233078},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C163258240","wikidata":"https://www.wikidata.org/wiki/Q25342","display_name":"Power (physics)","level":2,"score":0.0},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.3115/v1/p14-1127","is_oa":true,"landing_page_url":"https://doi.org/10.3115/v1/p14-1127","pdf_url":"https://aclanthology.org/P14-1127.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 52nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.3115/v1/p14-1127","is_oa":true,"landing_page_url":"https://doi.org/10.3115/v1/p14-1127","pdf_url":"https://aclanthology.org/P14-1127.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 52nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)","raw_type":"proceedings-article"},"sustainable_development_goals":[{"display_name":"Quality Education","score":0.7900000214576721,"id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2165921245.pdf","grobid_xml":"https://content.openalex.org/works/W2165921245.grobid-xml"},"referenced_works_count":29,"referenced_works":["https://openalex.org/W4629839","https://openalex.org/W64527363","https://openalex.org/W1487714050","https://openalex.org/W1543512527","https://openalex.org/W1604228412","https://openalex.org/W1616871572","https://openalex.org/W1642393990","https://openalex.org/W1650656906","https://openalex.org/W1687743765","https://openalex.org/W1902030528","https://openalex.org/W1994359934","https://openalex.org/W2036421848","https://openalex.org/W2047603832","https://openalex.org/W2053306448","https://openalex.org/W2063721546","https://openalex.org/W2074999873","https://openalex.org/W2101295087","https://openalex.org/W2103170111","https://openalex.org/W2108622839","https://openalex.org/W2116211107","https://openalex.org/W2121173708","https://openalex.org/W2122891480","https://openalex.org/W2130935327","https://openalex.org/W2151409774","https://openalex.org/W2153903004","https://openalex.org/W2157963512","https://openalex.org/W2168576900","https://openalex.org/W2250323038","https://openalex.org/W2250648475"],"related_works":["https://openalex.org/W70126818","https://openalex.org/W2951796583","https://openalex.org/W2795118711","https://openalex.org/W3136670730","https://openalex.org/W2595800753","https://openalex.org/W4225398085","https://openalex.org/W4294250292","https://openalex.org/W2891067899","https://openalex.org/W3002261956","https://openalex.org/W4230848040"],"abstract_inverted_index":{"We":[0,27],"present":[1],"a":[2,45,88],"novel":[3],"way":[4],"of":[5,69,83,91],"generating":[6],"unseen":[7],"words,":[8],"which":[9],"is":[10],"useful":[11],"for":[12],"certain":[13],"applications":[14],"such":[15],"as":[16],"automatic":[17],"speech":[18],"recognition":[19,23],"or":[20],"optical":[21],"character":[22],"in":[24,40],"low-resource":[25,34],"languages.":[26],"test":[28,47],"our":[29,61,73,78],"vocabulary":[30],"generator":[31],"on":[32,44,65],"seven":[33],"languages":[35,50],"by":[36],"measuring":[37],"the":[38,66,70,84],"decrease":[39],"out-of-vocabulary":[41,86],"word":[42],"rate":[43],"held-out":[46],"set.":[48],"The":[49],"we":[51,58],"study":[52],"have":[53],"very":[54],"different":[55],"morphological":[56,67],"properties;":[57],"show":[59],"how":[60],"results":[62],"differ":[63],"depending":[64],"complexity":[68],"language.":[71],"In":[72],"best":[74],"result":[75],"(on":[76],"Assamese),":[77],"approach":[79],"can":[80],"predict":[81],"29%":[82],"token-based":[85],"with":[87],"small":[89],"amount":[90],"unlabeled":[92],"training":[93],"data.":[94]},"counts_by_year":[{"year":2019,"cited_by_count":6},{"year":2018,"cited_by_count":2},{"year":2017,"cited_by_count":2},{"year":2016,"cited_by_count":6},{"year":2015,"cited_by_count":1},{"year":2014,"cited_by_count":3}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
