{"id":"https://openalex.org/W3211404322","doi":"https://doi.org/10.1109/access.2021.3128439","title":"Morpheme Embedding for Bahasa Indonesia Using Modified Byte Pair Encoding","display_name":"Morpheme Embedding for Bahasa Indonesia Using Modified Byte Pair Encoding","publication_year":2021,"publication_date":"2021-01-01","ids":{"openalex":"https://openalex.org/W3211404322","doi":"https://doi.org/10.1109/access.2021.3128439","mag":"3211404322"},"language":"en","primary_location":{"id":"doi:10.1109/access.2021.3128439","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2021.3128439","pdf_url":null,"source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1109/access.2021.3128439","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5061307708","display_name":"Amalia Amalia","orcid":"https://orcid.org/0000-0003-0595-8296"},"institutions":[{"id":"https://openalex.org/I254887060","display_name":"Universitas Sumatera Utara","ror":"https://ror.org/01kknrc90","country_code":"ID","type":"education","lineage":["https://openalex.org/I254887060"]}],"countries":["ID"],"is_corresponding":false,"raw_author_name":"Amalia Amalia","raw_affiliation_strings":["Department of Computer Science, Universitas Sumatera Utara, Medan 20222, Indonesia. (e-mail: amalia@usu.ac.id)"],"raw_orcid":"https://orcid.org/0000-0003-0595-8296","affiliations":[{"raw_affiliation_string":"Department of Computer Science, Universitas Sumatera Utara, Medan 20222, Indonesia. (e-mail: amalia@usu.ac.id)","institution_ids":["https://openalex.org/I254887060"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013645071","display_name":"Opim Salim Sitompul","orcid":"https://orcid.org/0000-0001-6069-1841"},"institutions":[{"id":"https://openalex.org/I254887060","display_name":"Universitas Sumatera Utara","ror":"https://ror.org/01kknrc90","country_code":"ID","type":"education","lineage":["https://openalex.org/I254887060"]}],"countries":["ID"],"is_corresponding":false,"raw_author_name":"Opim Salim Sitompul","raw_affiliation_strings":["Department of Information Technology, Universitas Sumatera Utara, Medan 20222, Indonesia"],"raw_orcid":"https://orcid.org/0000-0001-6069-1841","affiliations":[{"raw_affiliation_string":"Department of Information Technology, Universitas Sumatera Utara, Medan 20222, Indonesia","institution_ids":["https://openalex.org/I254887060"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5055331226","display_name":"Teddy Mantoro","orcid":"https://orcid.org/0000-0003-2534-809X"},"institutions":[{"id":"https://openalex.org/I4210165309","display_name":"Sampoerna University","ror":"https://ror.org/05t91sk49","country_code":"ID","type":"education","lineage":["https://openalex.org/I4210165309"]}],"countries":["ID"],"is_corresponding":false,"raw_author_name":"Teddy Mantoro","raw_affiliation_strings":["Department of Computer Science, Sampoerna University, Jakarta 12780, Indonesia"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Computer Science, Sampoerna University, Jakarta 12780, Indonesia","institution_ids":["https://openalex.org/I4210165309"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5034228625","display_name":"Erna Budhiarti Nababan","orcid":"https://orcid.org/0000-0002-6368-5997"},"institutions":[{"id":"https://openalex.org/I254887060","display_name":"Universitas Sumatera Utara","ror":"https://ror.org/01kknrc90","country_code":"ID","type":"education","lineage":["https://openalex.org/I254887060"]}],"countries":["ID"],"is_corresponding":false,"raw_author_name":"Erna Budhiarti Nababan","raw_affiliation_strings":["Department of Information Technology, Universitas Sumatera Utara, Medan 20222, Indonesia"],"raw_orcid":"https://orcid.org/0000-0002-6368-5997","affiliations":[{"raw_affiliation_string":"Department of Information Technology, Universitas Sumatera Utara, Medan 20222, Indonesia","institution_ids":["https://openalex.org/I254887060"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":{"value":1850,"currency":"USD","value_usd":1850},"apc_paid":{"value":1850,"currency":"USD","value_usd":1850},"fwci":1.1195,"has_fulltext":false,"cited_by_count":15,"citation_normalized_percentile":{"value":0.82748191,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":"9","issue":null,"first_page":"155699","last_page":"155710"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9973999857902527,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13559","display_name":"Edcuational Technology Systems","score":0.988099992275238,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/morpheme","display_name":"Morpheme","score":0.8659608364105225},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.751591682434082},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.6760687828063965},{"id":"https://openalex.org/keywords/prefix","display_name":"Prefix","score":0.6208059191703796},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5920406579971313},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.5890246629714966},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.5603736639022827},{"id":"https://openalex.org/keywords/word-embedding","display_name":"Word embedding","score":0.5155110359191895},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.35137104988098145},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.3308802843093872}],"concepts":[{"id":"https://openalex.org/C165297611","wikidata":"https://www.wikidata.org/wiki/Q43249","display_name":"Morpheme","level":2,"score":0.8659608364105225},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.751591682434082},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6760687828063965},{"id":"https://openalex.org/C141603448","wikidata":"https://www.wikidata.org/wiki/Q134830","display_name":"Prefix","level":2,"score":0.6208059191703796},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5920406579971313},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.5890246629714966},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.5603736639022827},{"id":"https://openalex.org/C2777462759","wikidata":"https://www.wikidata.org/wiki/Q18395344","display_name":"Word embedding","level":3,"score":0.5155110359191895},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.35137104988098145},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.3308802843093872},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/access.2021.3128439","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2021.3128439","pdf_url":null,"source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:eb8bfc9bfc5644308d925a818fa0dfc3","is_oa":true,"landing_page_url":"https://doaj.org/article/eb8bfc9bfc5644308d925a818fa0dfc3","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"IEEE Access, Vol 9, Pp 155699-155710 (2021)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1109/access.2021.3128439","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2021.3128439","pdf_url":null,"source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"sustainable_development_goals":[{"display_name":"Quality Education","score":0.7099999785423279,"id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320327947","display_name":"Universitas Sumatera Utara","ror":"https://ror.org/01kknrc90"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":51,"referenced_works":["https://openalex.org/W46679369","https://openalex.org/W1614298861","https://openalex.org/W2053306448","https://openalex.org/W2071709160","https://openalex.org/W2138857742","https://openalex.org/W2163922914","https://openalex.org/W2250539671","https://openalex.org/W2251012068","https://openalex.org/W2251565024","https://openalex.org/W2251830157","https://openalex.org/W2251874715","https://openalex.org/W2460442863","https://openalex.org/W2493916176","https://openalex.org/W2507731975","https://openalex.org/W2529840421","https://openalex.org/W2573169100","https://openalex.org/W2606342375","https://openalex.org/W2743935014","https://openalex.org/W2762022354","https://openalex.org/W2785317943","https://openalex.org/W2792073421","https://openalex.org/W2798955519","https://openalex.org/W2886527127","https://openalex.org/W2901215838","https://openalex.org/W2941241466","https://openalex.org/W2962784628","https://openalex.org/W2963250244","https://openalex.org/W2963711067","https://openalex.org/W2963979492","https://openalex.org/W2964005834","https://openalex.org/W2964047576","https://openalex.org/W2964053711","https://openalex.org/W3035207248","https://openalex.org/W3101140821","https://openalex.org/W3183153947","https://openalex.org/W4249319664","https://openalex.org/W4294170691","https://openalex.org/W4298857951","https://openalex.org/W6601894380","https://openalex.org/W6636510571","https://openalex.org/W6680300913","https://openalex.org/W6682691769","https://openalex.org/W6691746754","https://openalex.org/W6732046901","https://openalex.org/W6742672929","https://openalex.org/W6743359967","https://openalex.org/W6745159025","https://openalex.org/W6748507051","https://openalex.org/W6762569599","https://openalex.org/W6769361421","https://openalex.org/W6798472456"],"related_works":["https://openalex.org/W4237628466","https://openalex.org/W3112474074","https://openalex.org/W2735979742","https://openalex.org/W2362025288","https://openalex.org/W4388740574","https://openalex.org/W4384447452","https://openalex.org/W2971923188","https://openalex.org/W2889906500","https://openalex.org/W3137191661","https://openalex.org/W2035645574"],"abstract_inverted_index":{"Word":[0,13],"embedding":[1,14,44,79,144,212],"is":[2,41],"an":[3],"efficient":[4],"feature":[5],"representation":[6],"that":[7,20,32,63,107,182,187],"carries":[8],"semantic":[9,206],"and":[10,28,102,124,204,207],"syntactic":[11,208],"information.":[12],"works":[15],"as":[16,23,52,100],"a":[17,60,77,135,154,178],"word":[18,61,111],"level":[19],"treats":[21],"words":[22,31,50,128],"minor":[24,47],"independent":[25],"entity":[26],"units":[27],"cannot":[29],"handle":[30,202],"are":[33,55,188],"not":[34,122,189],"in":[35,66,88,97,110,129,198,210],"the":[36,56,67,115,119,147,159,164,175,196,199,211,215],"training":[37],"corpus.":[38],"One":[39],"solution":[40],"to":[42,75,141,146,173],"generate":[43,142],"from":[45],"more":[46],"parts":[48],"of":[49,59,70,149,166,177,214],"such":[51,99],"morphemes.":[53,168],"Morphemes":[54],"smallest":[57],"part":[58],"linguistic":[62],"has":[64,171],"meaning":[65],"grammatical":[68],"unit":[69],"languages.":[71],"This":[72,105,169],"study":[73,133,152,200],"aims":[74],"build":[76],"morpheme":[78,143],"model":[80,197],"for":[81,126],"Bahasa":[82],"Indonesia":[83],"(in":[84],"English:":[85],"Indonesian":[86],"Language)":[87],"sort:":[89],"Bahasa.":[90,130,150],"However,":[91],"there":[92],"were":[93,121],"many":[94],"morphological":[95],"rules":[96,109,120],"Bahasa,":[98],"inflectional":[101],"derivational":[103],"affixes.":[104],"implies":[106],"all":[108,127],"segmentation":[112,161],"will":[113],"increase":[114],"computational":[116],"complexity.":[117],"Moreover,":[118],"regular":[123],"similar":[125],"Therefore,":[131],"this":[132],"modified":[134],"Byte":[136],"Pair":[137],"Embedding":[138],"(BPE)":[139],"algorithm":[140,181],"appropriate":[145],"morphology":[148],"The":[151],"implemented":[153],"simple":[155],"method":[156],"by":[157],"filtering":[158],"BPE":[160,180],"results":[162],"with":[163],"list":[165],"Bahasa\u2019s":[167],"process":[170],"proven":[172],"anticipate":[174],"limitation":[176],"conventional":[179],"produces":[183],"intermediate":[184],"junk":[185],"tokens":[186],"meaningful.":[190],"Based":[191],"on":[192],"three":[193],"evaluation":[194],"scenarios,":[195],"can":[201],"OOV":[203],"carry":[205],"information":[209],"value":[213],"words.":[216]},"counts_by_year":[{"year":2026,"cited_by_count":4},{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":5},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":2}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
