{"id":"https://openalex.org/W3006817308","doi":"https://doi.org/10.1109/bigdata47090.2019.9005957","title":"Parsimonious Morpheme Segmentation with an Application to Enriching Word Embeddings","display_name":"Parsimonious Morpheme Segmentation with an Application to Enriching Word Embeddings","publication_year":2019,"publication_date":"2019-12-01","ids":{"openalex":"https://openalex.org/W3006817308","doi":"https://doi.org/10.1109/bigdata47090.2019.9005957","mag":"3006817308"},"language":"en","primary_location":{"id":"doi:10.1109/bigdata47090.2019.9005957","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata47090.2019.9005957","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2019 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5035119055","display_name":"Ahmed El-Kishky","orcid":"https://orcid.org/0000-0003-0121-7781"},"institutions":[{"id":"https://openalex.org/I157725225","display_name":"University of Illinois Urbana-Champaign","ror":"https://ror.org/047426m28","country_code":"US","type":"education","lineage":["https://openalex.org/I157725225"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Ahmed El-Kishky","raw_affiliation_strings":["The University of Illinois at Urbana-Champaign"],"affiliations":[{"raw_affiliation_string":"The University of Illinois at Urbana-Champaign","institution_ids":["https://openalex.org/I157725225"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038743835","display_name":"Frank F. Xu","orcid":"https://orcid.org/0000-0002-9662-7582"},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Frank Xu","raw_affiliation_strings":["Carnegie Mellon University"],"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049841140","display_name":"Aston Zhang","orcid":null},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Aston Zhang","raw_affiliation_strings":["Amazon AI"],"affiliations":[{"raw_affiliation_string":"Amazon AI","institution_ids":["https://openalex.org/I1311688040"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5019539533","display_name":"Jiawei Han","orcid":"https://orcid.org/0000-0002-3629-2696"},"institutions":[{"id":"https://openalex.org/I157725225","display_name":"University of Illinois Urbana-Champaign","ror":"https://ror.org/047426m28","country_code":"US","type":"education","lineage":["https://openalex.org/I157725225"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jiawei Han","raw_affiliation_strings":["The University of Illinois at Urbana-Champaign"],"affiliations":[{"raw_affiliation_string":"The University of Illinois at Urbana-Champaign","institution_ids":["https://openalex.org/I157725225"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5035119055"],"corresponding_institution_ids":["https://openalex.org/I157725225"],"apc_list":null,"apc_paid":null,"fwci":0.28,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.67847425,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"64","last_page":"73"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13629","display_name":"Text Readability and Simplification","score":0.9962999820709229,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/morpheme","display_name":"Morpheme","score":0.9024174213409424},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7894537448883057},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.7728772163391113},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.7063910365104675},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.5978575944900513},{"id":"https://openalex.org/keywords/variety","display_name":"Variety (cybernetics)","score":0.5887607336044312},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.559445858001709},{"id":"https://openalex.org/keywords/word-embedding","display_name":"Word embedding","score":0.4994175434112549},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.48233506083488464},{"id":"https://openalex.org/keywords/vocabulary","display_name":"Vocabulary","score":0.4806210398674011},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.46306294202804565},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.42062029242515564},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.21756795048713684}],"concepts":[{"id":"https://openalex.org/C165297611","wikidata":"https://www.wikidata.org/wiki/Q43249","display_name":"Morpheme","level":2,"score":0.9024174213409424},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7894537448883057},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.7728772163391113},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7063910365104675},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.5978575944900513},{"id":"https://openalex.org/C136197465","wikidata":"https://www.wikidata.org/wiki/Q1729295","display_name":"Variety (cybernetics)","level":2,"score":0.5887607336044312},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.559445858001709},{"id":"https://openalex.org/C2777462759","wikidata":"https://www.wikidata.org/wiki/Q18395344","display_name":"Word embedding","level":3,"score":0.4994175434112549},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.48233506083488464},{"id":"https://openalex.org/C2777601683","wikidata":"https://www.wikidata.org/wiki/Q6499736","display_name":"Vocabulary","level":2,"score":0.4806210398674011},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.46306294202804565},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.42062029242515564},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.21756795048713684},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/bigdata47090.2019.9005957","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata47090.2019.9005957","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2019 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.6399999856948853,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":65,"referenced_works":["https://openalex.org/W196214544","https://openalex.org/W1498436455","https://openalex.org/W1515532631","https://openalex.org/W1614298861","https://openalex.org/W1616871572","https://openalex.org/W1854884267","https://openalex.org/W1938755728","https://openalex.org/W1940872118","https://openalex.org/W1957775505","https://openalex.org/W1983311927","https://openalex.org/W2006672970","https://openalex.org/W2034915529","https://openalex.org/W2039660096","https://openalex.org/W2040711288","https://openalex.org/W2041404167","https://openalex.org/W2042232391","https://openalex.org/W2053921957","https://openalex.org/W2070554026","https://openalex.org/W2110485445","https://openalex.org/W2115867364","https://openalex.org/W2117130368","https://openalex.org/W2117621558","https://openalex.org/W2118090838","https://openalex.org/W2121879602","https://openalex.org/W2141599568","https://openalex.org/W2142377809","https://openalex.org/W2153579005","https://openalex.org/W2155033295","https://openalex.org/W2169147927","https://openalex.org/W2176796957","https://openalex.org/W2215542482","https://openalex.org/W2250879510","https://openalex.org/W2251012068","https://openalex.org/W2251176673","https://openalex.org/W2493916176","https://openalex.org/W2525778437","https://openalex.org/W2807217870","https://openalex.org/W2807354286","https://openalex.org/W2851783924","https://openalex.org/W2950577311","https://openalex.org/W2951559648","https://openalex.org/W2962784628","https://openalex.org/W2963419157","https://openalex.org/W2963626623","https://openalex.org/W2963979492","https://openalex.org/W2964005834","https://openalex.org/W2978725006","https://openalex.org/W3216404684","https://openalex.org/W4205924575","https://openalex.org/W4247704254","https://openalex.org/W4254816979","https://openalex.org/W4294170691","https://openalex.org/W6607974698","https://openalex.org/W6629815555","https://openalex.org/W6636510571","https://openalex.org/W6640362995","https://openalex.org/W6640598943","https://openalex.org/W6677518113","https://openalex.org/W6680890276","https://openalex.org/W6681039121","https://openalex.org/W6682691769","https://openalex.org/W6685643745","https://openalex.org/W6691746754","https://openalex.org/W6752547996","https://openalex.org/W6752782857"],"related_works":["https://openalex.org/W4289013130","https://openalex.org/W4241414757","https://openalex.org/W4283366759","https://openalex.org/W2383186719","https://openalex.org/W4206127412","https://openalex.org/W3103292258","https://openalex.org/W2303278641","https://openalex.org/W2356355377","https://openalex.org/W2350316598","https://openalex.org/W4286432911"],"abstract_inverted_index":{"Traditionally,":[0],"many":[1,15],"text-mining":[2],"tasks":[3],"treat":[4],"individual":[5],"word-tokens":[6],"as":[7],"the":[8,33,89,98],"finest":[9],"meaningful":[10,26],"semantic":[11,34],"granularity.":[12],"However,":[13],"in":[14,37,57,117],"languages":[16,121],"and":[17,148],"specialized":[18],"corpora,":[19],"words":[20,56,87,116],"are":[21],"composed":[22],"by":[23],"concatenating":[24],"semantically":[25],"subword":[27,39],"structures.":[28,40],"Word-level":[29],"analysis":[30],"cannot":[31],"leverage":[32],"information":[35],"present":[36],"such":[38],"With":[41],"regard":[42],"to":[43,49,84,102,133],"word":[44,135],"embedding":[45,139,146],"techniques,":[46],"this":[47,70],"leads":[48,101],"not":[50],"only":[51],"poor":[52],"embeddings":[53,136],"for":[54,65,75],"infrequent":[55],"long-tailed":[58],"text":[59],"corpora":[60],"but":[61],"also":[62],"weak":[63],"capabilities":[64],"handling":[66],"out-of-vocabulary":[67],"words.":[68],"In":[69],"paper":[71],"we":[72,126],"propose":[73],"MorphMine":[74,79,114,131],"unsupervised":[76],"morpheme":[77],"segmentation.":[78,110],"applies":[80],"a":[81,118,142,149],"parsimony":[82],"criterion":[83],"hierarchically":[85],"segment":[86],"into":[88,122],"fewest":[90],"number":[91],"of":[92,97,109,120,144,145],"morphemes":[93,105,132],"at":[94,106],"each":[95,107],"level":[96,108],"hierarchy.":[99],"This":[100],"longer":[103],"shared":[104],"Experiments":[111],"show":[112],"that":[113,129],"segments":[115],"variety":[119,143],"human-verified":[123],"morphemes.":[124],"Additionally,":[125],"experimentally":[127],"demonstrate":[128],"utilizing":[130],"enrich":[134],"consistently":[137],"improves":[138],"quality":[140],"on":[141],"evaluations":[147],"downstream":[150],"language":[151],"modeling":[152],"task.":[153]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
