{"id":"https://openalex.org/W4327967184","doi":"https://doi.org/10.3390/info14030195","title":"Learned Text Representation for Amharic Information Retrieval and Natural Language Processing","display_name":"Learned Text Representation for Amharic Information Retrieval and Natural Language Processing","publication_year":2023,"publication_date":"2023-03-20","ids":{"openalex":"https://openalex.org/W4327967184","doi":"https://doi.org/10.3390/info14030195"},"language":"en","primary_location":{"id":"doi:10.3390/info14030195","is_oa":true,"landing_page_url":"https://doi.org/10.3390/info14030195","pdf_url":"https://www.mdpi.com/2078-2489/14/3/195/pdf?version=1679305861","source":{"id":"https://openalex.org/S4210219776","display_name":"Information","issn_l":"2078-2489","issn":["2078-2489"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Information","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.mdpi.com/2078-2489/14/3/195/pdf?version=1679305861","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5088171082","display_name":"Tilahun Yeshambel","orcid":"https://orcid.org/0000-0003-0599-262X"},"institutions":[{"id":"https://openalex.org/I4537092","display_name":"Addis Ababa University","ror":"https://ror.org/038b8e254","country_code":"ET","type":"education","lineage":["https://openalex.org/I4537092"]}],"countries":["ET"],"is_corresponding":false,"raw_author_name":"Tilahun Yeshambel","raw_affiliation_strings":["IT Doctorial Program, Addis Ababa University, Addis Ababa P.O. Box 1176, Ethiopia"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"IT Doctorial Program, Addis Ababa University, Addis Ababa P.O. Box 1176, Ethiopia","institution_ids":["https://openalex.org/I4537092"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5035274820","display_name":"Josiane Mothe","orcid":"https://orcid.org/0000-0001-9273-2193"},"institutions":[{"id":"https://openalex.org/I1294671590","display_name":"Centre National de la Recherche Scientifique","ror":"https://ror.org/02feahw73","country_code":"FR","type":"government","lineage":["https://openalex.org/I1294671590"]},{"id":"https://openalex.org/I134560555","display_name":"Universit\u00e9 Toulouse III - Paul Sabatier","ror":"https://ror.org/02v6kpv12","country_code":"FR","type":"education","lineage":["https://openalex.org/I134560555"]},{"id":"https://openalex.org/I3131550300","display_name":"Universit\u00e9 Toulouse-I-Capitole","ror":"https://ror.org/0443n9e75","country_code":"FR","type":"education","lineage":["https://openalex.org/I3131550300"]},{"id":"https://openalex.org/I4210119061","display_name":"Institut de Recherche en Informatique de Toulouse","ror":"https://ror.org/01rx4qw44","country_code":"FR","type":"facility","lineage":["https://openalex.org/I1294671590","https://openalex.org/I205747304","https://openalex.org/I205747304","https://openalex.org/I4210119061","https://openalex.org/I4210152422","https://openalex.org/I4387153255","https://openalex.org/I4405258862","https://openalex.org/I4405259414"]},{"id":"https://openalex.org/I4210152422","display_name":"Universit\u00e9 Toulouse - Jean Jaur\u00e8s","ror":"https://ror.org/04ezk3x31","country_code":"FR","type":"education","lineage":["https://openalex.org/I4210152422"]},{"id":"https://openalex.org/I4210160189","display_name":"Institut Polytechnique de Bordeaux","ror":"https://ror.org/054qv7y42","country_code":"FR","type":"education","lineage":["https://openalex.org/I4210160189"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Josiane Mothe","raw_affiliation_strings":["Componsante INSPE, IRIT, UMR5505 CNRS, Universit\u00e9 de Toulouse Jean-Jaur\u00e8s, 118 Rte de Narbonne, F31400 Toulouse, France"],"raw_orcid":"https://orcid.org/0000-0001-9273-2193","affiliations":[{"raw_affiliation_string":"Componsante INSPE, IRIT, UMR5505 CNRS, Universit\u00e9 de Toulouse Jean-Jaur\u00e8s, 118 Rte de Narbonne, F31400 Toulouse, France","institution_ids":["https://openalex.org/I4210152422","https://openalex.org/I134560555","https://openalex.org/I3131550300","https://openalex.org/I4210160189","https://openalex.org/I1294671590","https://openalex.org/I4210119061"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5010912481","display_name":"Yaregal Assabie","orcid":"https://orcid.org/0000-0001-7591-9298"},"institutions":[{"id":"https://openalex.org/I4537092","display_name":"Addis Ababa University","ror":"https://ror.org/038b8e254","country_code":"ET","type":"education","lineage":["https://openalex.org/I4537092"]}],"countries":["ET"],"is_corresponding":false,"raw_author_name":"Yaregal Assabie","raw_affiliation_strings":["Department of Computer Science, Addis Ababa University, Addis Ababa P.O. Box 1176, Ethiopia"],"raw_orcid":"https://orcid.org/0000-0001-7591-9298","affiliations":[{"raw_affiliation_string":"Department of Computer Science, Addis Ababa University, Addis Ababa P.O. Box 1176, Ethiopia","institution_ids":["https://openalex.org/I4537092"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":{"value":1400,"currency":"CHF","value_usd":1515},"apc_paid":{"value":1600,"currency":"EUR","value_usd":1725},"fwci":2.6106,"has_fulltext":false,"cited_by_count":16,"citation_normalized_percentile":{"value":0.91505596,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":95,"max":99},"biblio":{"volume":"14","issue":"3","first_page":"195","last_page":"195"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9977999925613403,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/amharic","display_name":"Amharic","score":0.8885178565979004},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8506010174751282},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.8074382543563843},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.7446027994155884},{"id":"https://openalex.org/keywords/word2vec","display_name":"Word2vec","score":0.5880657434463501},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.515893816947937},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.4482708275318146},{"id":"https://openalex.org/keywords/lemmatisation","display_name":"Lemmatisation","score":0.41798335313796997},{"id":"https://openalex.org/keywords/sentence","display_name":"Sentence","score":0.41066649556159973},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.12817972898483276}],"concepts":[{"id":"https://openalex.org/C2780900699","wikidata":"https://www.wikidata.org/wiki/Q28244","display_name":"Amharic","level":2,"score":0.8885178565979004},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8506010174751282},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.8074382543563843},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7446027994155884},{"id":"https://openalex.org/C2776461190","wikidata":"https://www.wikidata.org/wiki/Q22673982","display_name":"Word2vec","level":3,"score":0.5880657434463501},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.515893816947937},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.4482708275318146},{"id":"https://openalex.org/C161831844","wikidata":"https://www.wikidata.org/wiki/Q2554325","display_name":"Lemmatisation","level":2,"score":0.41798335313796997},{"id":"https://openalex.org/C2777530160","wikidata":"https://www.wikidata.org/wiki/Q41796","display_name":"Sentence","level":2,"score":0.41066649556159973},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.12817972898483276},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.3390/info14030195","is_oa":true,"landing_page_url":"https://doi.org/10.3390/info14030195","pdf_url":"https://www.mdpi.com/2078-2489/14/3/195/pdf?version=1679305861","source":{"id":"https://openalex.org/S4210219776","display_name":"Information","issn_l":"2078-2489","issn":["2078-2489"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Information","raw_type":"journal-article"},{"id":"pmh:oai:HAL:hal-04229854v1","is_oa":true,"landing_page_url":"https://hal.science/hal-04229854","pdf_url":null,"source":{"id":"https://openalex.org/S4306402512","display_name":"HAL (Le Centre pour la Communication Scientifique Directe)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1294671590","host_organization_name":"Centre National de la Recherche Scientifique","host_organization_lineage":["https://openalex.org/I1294671590"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"https://www.mdpi.com/2078-2489/14/3/195","raw_type":"Journal articles"},{"id":"pmh:oai:doaj.org/article:ef4bf60cec9a4545aa757c1d8cb52bca","is_oa":true,"landing_page_url":"https://doaj.org/article/ef4bf60cec9a4545aa757c1d8cb52bca","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Information, Vol 14, Iss 3, p 195 (2023)","raw_type":"article"},{"id":"pmh:oai:mdpi.com:/2078-2489/14/3/195/","is_oa":true,"landing_page_url":"https://dx.doi.org/10.3390/info14030195","pdf_url":null,"source":{"id":"https://openalex.org/S4306400947","display_name":"MDPI (MDPI AG)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210097602","host_organization_name":"Multidisciplinary Digital Publishing Institute (Switzerland)","host_organization_lineage":["https://openalex.org/I4210097602"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Information; Volume 14; Issue 3; Pages: 195","raw_type":"Text"}],"best_oa_location":{"id":"doi:10.3390/info14030195","is_oa":true,"landing_page_url":"https://doi.org/10.3390/info14030195","pdf_url":"https://www.mdpi.com/2078-2489/14/3/195/pdf?version=1679305861","source":{"id":"https://openalex.org/S4210219776","display_name":"Information","issn_l":"2078-2489","issn":["2078-2489"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Information","raw_type":"journal-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.8399999737739563}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4327967184.pdf","grobid_xml":"https://content.openalex.org/works/W4327967184.grobid-xml"},"referenced_works_count":53,"referenced_works":["https://openalex.org/W2071761836","https://openalex.org/W2077428231","https://openalex.org/W2118020653","https://openalex.org/W2131744502","https://openalex.org/W2153579005","https://openalex.org/W2158139315","https://openalex.org/W2250539671","https://openalex.org/W2252143362","https://openalex.org/W2405884322","https://openalex.org/W2748807259","https://openalex.org/W2786464815","https://openalex.org/W2792958857","https://openalex.org/W2864258299","https://openalex.org/W2896457183","https://openalex.org/W2952370363","https://openalex.org/W2954264361","https://openalex.org/W2962839482","https://openalex.org/W2970474271","https://openalex.org/W2986154550","https://openalex.org/W2990188683","https://openalex.org/W3008110149","https://openalex.org/W3015935667","https://openalex.org/W3035390927","https://openalex.org/W3083321524","https://openalex.org/W3086559106","https://openalex.org/W3092786496","https://openalex.org/W3092952717","https://openalex.org/W3096274078","https://openalex.org/W3101058639","https://openalex.org/W3103187652","https://openalex.org/W3105220303","https://openalex.org/W3153221513","https://openalex.org/W3184486121","https://openalex.org/W3184620921","https://openalex.org/W3185319288","https://openalex.org/W3196876174","https://openalex.org/W3197298549","https://openalex.org/W3198659451","https://openalex.org/W3208076653","https://openalex.org/W3213215942","https://openalex.org/W4210385506","https://openalex.org/W4212902066","https://openalex.org/W4226112897","https://openalex.org/W4254150231","https://openalex.org/W4281689302","https://openalex.org/W6621906925","https://openalex.org/W6631190155","https://openalex.org/W6668432481","https://openalex.org/W6679915538","https://openalex.org/W6684468177","https://openalex.org/W6748304040","https://openalex.org/W6763238093","https://openalex.org/W6775772599"],"related_works":["https://openalex.org/W2071761836","https://openalex.org/W2068668614","https://openalex.org/W3121919928","https://openalex.org/W584933867","https://openalex.org/W2021956231","https://openalex.org/W4386930358","https://openalex.org/W1972048371","https://openalex.org/W4287822602","https://openalex.org/W589849612","https://openalex.org/W3014460680"],"abstract_inverted_index":{"Over":[0],"the":[1,41,58,90,111,127,132,143,188],"past":[2],"few":[3],"years,":[4],"word":[5,103,117,138,191,224],"embeddings":[6,104,192,225],"and":[7,28,70,99,105,122,158,173,193,199,209,216,220],"bidirectional":[8],"encoder":[9],"representations":[10,22,176],"from":[11],"transformers":[12],"(BERT)":[13],"models":[14,50,75,195],"have":[15],"brought":[16],"better":[17,213],"solutions":[18],"to":[19,40,65],"learning":[20],"text":[21,37,79,94,159,175,218],"for":[23,51,60,76,96,116,151,179],"natural":[24],"language":[25,49,107,153,210],"processing":[26,80],"(NLP)":[27],"other":[29,223],"tasks.":[30,161],"Many":[31],"NLP":[32,100],"applications":[33],"rely":[34],"on":[35,89,187,196,226],"pre-trained":[36,74,147],"representations,":[38,219],"leading":[39],"development":[42],"of":[43,46,92,134,145,190],"a":[44,67,146,183],"number":[45],"neural":[47],"network":[48],"various":[52],"languages.":[53],"However,":[54],"this":[55],"is":[56,63],"not":[57,82],"case":[59],"Amharic,":[61],"which":[62],"known":[64],"be":[66],"morphologically":[68],"complex":[69],"under-resourced":[71],"language.":[72],"Usable":[73],"automatic":[77],"Amharic":[78,148,162],"are":[81],"available.":[83],"This":[84],"paper":[85],"presents":[86],"an":[87],"investigation":[88],"essence":[91],"learned":[93],"representation":[95],"information":[97,165],"retrieval":[98,166],"tasks":[101],"using":[102,137],"BERT":[106,128,149,194],"models.":[108],"We":[109,130,140,181],"explored":[110],"most":[112],"commonly":[113],"used":[114,178],"methods":[115],"embeddings,":[118],"including":[119],"word2vec,":[120],"GloVe,":[121],"fastText,":[123],"as":[124,126],"well":[125],"model.":[129],"investigated":[131],"performance":[133],"query":[135,207],"expansion":[136,208],"embeddings.":[139],"also":[141],"analyzed":[142],"use":[144],"model":[150],"masked":[152],"modeling,":[154],"next":[155],"sentence":[156],"prediction,":[157],"classification":[160],"ad":[163],"hoc":[164],"test":[167],"collections":[168],"that":[169,205],"contain":[170],"word-based,":[171,197],"stem-based,":[172,198],"root-based":[174,200,217],"were":[177],"evaluation.":[180],"conducted":[182],"detailed":[184],"empirical":[185],"analysis":[186],"usability":[189],"corpora.":[201],"Experimental":[202],"results":[203],"show":[204],"word-based":[206,227],"modeling":[211],"perform":[212],"than":[214],"stem-based":[215],"fastText":[221],"outperforms":[222],"corpus.":[228]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":7},{"year":2024,"cited_by_count":5},{"year":2023,"cited_by_count":3}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2023-03-21T00:00:00"}
