{"id":"https://openalex.org/W4416127052","doi":"https://doi.org/10.3390/make7040142","title":"Extreme Multi-Label Text Classification for Less-Represented Languages and Low-Resource Environments: Advances and Lessons Learned","display_name":"Extreme Multi-Label Text Classification for Less-Represented Languages and Low-Resource Environments: Advances and Lessons Learned","publication_year":2025,"publication_date":"2025-11-11","ids":{"openalex":"https://openalex.org/W4416127052","doi":"https://doi.org/10.3390/make7040142"},"language":"en","primary_location":{"id":"doi:10.3390/make7040142","is_oa":true,"landing_page_url":"https://doi.org/10.3390/make7040142","pdf_url":"https://www.mdpi.com/2504-4990/7/4/142/pdf?version=1762858739","source":{"id":"https://openalex.org/S4210213891","display_name":"Machine Learning and Knowledge Extraction","issn_l":"2504-4990","issn":["2504-4990"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Machine Learning and Knowledge Extraction","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.mdpi.com/2504-4990/7/4/142/pdf?version=1762858739","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5092831441","display_name":"Nikola Iva\u010di\u010d","orcid":"https://orcid.org/0009-0008-8016-9530"},"institutions":[{"id":"https://openalex.org/I3006985408","display_name":"Jo\u017eef Stefan Institute","ror":"https://ror.org/05060sz93","country_code":"SI","type":"facility","lineage":["https://openalex.org/I3006985408"]},{"id":"https://openalex.org/I4210113529","display_name":"Jo\u017eef Stefan International Postgraduate School","ror":"https://ror.org/01hdkb925","country_code":"SI","type":"education","lineage":["https://openalex.org/I4210113529"]}],"countries":["SI"],"is_corresponding":true,"raw_author_name":"Nikola Iva\u010di\u010d","raw_affiliation_strings":["Department of Knowledge Technologies, Jo\u017eef Stefan Institute, Jamova Cesta 39, 1000 Ljubljana, Slovenia","Jo\u017eef Stefan International Postgraduate School, Jamova Cesta 39, 1000 Ljubljana, Slovenia"],"raw_orcid":"https://orcid.org/0009-0008-8016-9530","affiliations":[{"raw_affiliation_string":"Department of Knowledge Technologies, Jo\u017eef Stefan Institute, Jamova Cesta 39, 1000 Ljubljana, Slovenia","institution_ids":["https://openalex.org/I3006985408"]},{"raw_affiliation_string":"Jo\u017eef Stefan International Postgraduate School, Jamova Cesta 39, 1000 Ljubljana, Slovenia","institution_ids":["https://openalex.org/I4210113529"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5007096285","display_name":"Bla\u017e \u0160krlj","orcid":"https://orcid.org/0000-0002-9916-8756"},"institutions":[{"id":"https://openalex.org/I3006985408","display_name":"Jo\u017eef Stefan Institute","ror":"https://ror.org/05060sz93","country_code":"SI","type":"facility","lineage":["https://openalex.org/I3006985408"]}],"countries":["SI"],"is_corresponding":false,"raw_author_name":"Bla\u017e \u0160krlj","raw_affiliation_strings":["Department of Knowledge Technologies, Jo\u017eef Stefan Institute, Jamova Cesta 39, 1000 Ljubljana, Slovenia"],"raw_orcid":"https://orcid.org/0000-0002-9916-8756","affiliations":[{"raw_affiliation_string":"Department of Knowledge Technologies, Jo\u017eef Stefan Institute, Jamova Cesta 39, 1000 Ljubljana, Slovenia","institution_ids":["https://openalex.org/I3006985408"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5027608856","display_name":"Boshko Koloski","orcid":"https://orcid.org/0000-0002-7330-0579"},"institutions":[{"id":"https://openalex.org/I3006985408","display_name":"Jo\u017eef Stefan Institute","ror":"https://ror.org/05060sz93","country_code":"SI","type":"facility","lineage":["https://openalex.org/I3006985408"]},{"id":"https://openalex.org/I4210113529","display_name":"Jo\u017eef Stefan International Postgraduate School","ror":"https://ror.org/01hdkb925","country_code":"SI","type":"education","lineage":["https://openalex.org/I4210113529"]}],"countries":["SI"],"is_corresponding":false,"raw_author_name":"Boshko Koloski","raw_affiliation_strings":["Department of Knowledge Technologies, Jo\u017eef Stefan Institute, Jamova Cesta 39, 1000 Ljubljana, Slovenia","Jo\u017eef Stefan International Postgraduate School, Jamova Cesta 39, 1000 Ljubljana, Slovenia"],"raw_orcid":"https://orcid.org/0000-0002-7330-0579","affiliations":[{"raw_affiliation_string":"Department of Knowledge Technologies, Jo\u017eef Stefan Institute, Jamova Cesta 39, 1000 Ljubljana, Slovenia","institution_ids":["https://openalex.org/I3006985408"]},{"raw_affiliation_string":"Jo\u017eef Stefan International Postgraduate School, Jamova Cesta 39, 1000 Ljubljana, Slovenia","institution_ids":["https://openalex.org/I4210113529"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5074881863","display_name":"Senja Pollak","orcid":"https://orcid.org/0000-0002-4380-0863"},"institutions":[{"id":"https://openalex.org/I3006985408","display_name":"Jo\u017eef Stefan Institute","ror":"https://ror.org/05060sz93","country_code":"SI","type":"facility","lineage":["https://openalex.org/I3006985408"]},{"id":"https://openalex.org/I4210113529","display_name":"Jo\u017eef Stefan International Postgraduate School","ror":"https://ror.org/01hdkb925","country_code":"SI","type":"education","lineage":["https://openalex.org/I4210113529"]}],"countries":["SI"],"is_corresponding":false,"raw_author_name":"Senja Pollak","raw_affiliation_strings":["Department of Knowledge Technologies, Jo\u017eef Stefan Institute, Jamova Cesta 39, 1000 Ljubljana, Slovenia","Jo\u017eef Stefan International Postgraduate School, Jamova Cesta 39, 1000 Ljubljana, Slovenia"],"raw_orcid":"https://orcid.org/0000-0002-4380-0863","affiliations":[{"raw_affiliation_string":"Department of Knowledge Technologies, Jo\u017eef Stefan Institute, Jamova Cesta 39, 1000 Ljubljana, Slovenia","institution_ids":["https://openalex.org/I3006985408"]},{"raw_affiliation_string":"Jo\u017eef Stefan International Postgraduate School, Jamova Cesta 39, 1000 Ljubljana, Slovenia","institution_ids":["https://openalex.org/I4210113529"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5060174190","display_name":"Nada Lavra\u010d","orcid":"https://orcid.org/0000-0002-9995-7093"},"institutions":[{"id":"https://openalex.org/I153976015","display_name":"University of Ljubljana","ror":"https://ror.org/05njb9z20","country_code":"SI","type":"education","lineage":["https://openalex.org/I153976015"]},{"id":"https://openalex.org/I3006985408","display_name":"Jo\u017eef Stefan Institute","ror":"https://ror.org/05060sz93","country_code":"SI","type":"facility","lineage":["https://openalex.org/I3006985408"]}],"countries":["SI"],"is_corresponding":false,"raw_author_name":"Nada Lavra\u010d","raw_affiliation_strings":["Department of Knowledge Technologies, Jo\u017eef Stefan Institute, Jamova Cesta 39, 1000 Ljubljana, Slovenia","Faculty of Computer and Information Science, University of Ljubljana, Ve\u010dna pot 113, 1000 Ljubljana, Slovenia"],"raw_orcid":"https://orcid.org/0000-0002-9995-7093","affiliations":[{"raw_affiliation_string":"Department of Knowledge Technologies, Jo\u017eef Stefan Institute, Jamova Cesta 39, 1000 Ljubljana, Slovenia","institution_ids":["https://openalex.org/I3006985408"]},{"raw_affiliation_string":"Faculty of Computer and Information Science, University of Ljubljana, Ve\u010dna pot 113, 1000 Ljubljana, Slovenia","institution_ids":["https://openalex.org/I153976015"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5047970122","display_name":"Matthew Purver","orcid":"https://orcid.org/0000-0003-2297-1273"},"institutions":[{"id":"https://openalex.org/I166337079","display_name":"Queen Mary University of London","ror":"https://ror.org/026zzn846","country_code":"GB","type":"education","lineage":["https://openalex.org/I124357947","https://openalex.org/I166337079"]},{"id":"https://openalex.org/I3006985408","display_name":"Jo\u017eef Stefan Institute","ror":"https://ror.org/05060sz93","country_code":"SI","type":"facility","lineage":["https://openalex.org/I3006985408"]}],"countries":["GB","SI"],"is_corresponding":false,"raw_author_name":"Matthew Purver","raw_affiliation_strings":["Department of Knowledge Technologies, Jo\u017eef Stefan Institute, Jamova Cesta 39, 1000 Ljubljana, Slovenia","School of Electronic Engineering and Computer Science, Queen Mary University of London, London E1 4NS, UK"],"raw_orcid":"https://orcid.org/0000-0003-2297-1273","affiliations":[{"raw_affiliation_string":"Department of Knowledge Technologies, Jo\u017eef Stefan Institute, Jamova Cesta 39, 1000 Ljubljana, Slovenia","institution_ids":["https://openalex.org/I3006985408"]},{"raw_affiliation_string":"School of Electronic Engineering and Computer Science, Queen Mary University of London, London E1 4NS, UK","institution_ids":["https://openalex.org/I166337079"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5092831441"],"corresponding_institution_ids":["https://openalex.org/I3006985408","https://openalex.org/I4210113529"],"apc_list":{"value":1400,"currency":"CHF","value_usd":1515},"apc_paid":{"value":1400,"currency":"CHF","value_usd":1515},"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.16707618,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"7","issue":"4","first_page":"142","last_page":"142"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9333999752998352,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9333999752998352,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10664","display_name":"Sentiment Analysis and Opinion Mining","score":0.022700000554323196,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12262","display_name":"Hate Speech and Cyberbullying Detection","score":0.011099999770522118,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/adaptability","display_name":"Adaptability","score":0.5784000158309937},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.47609999775886536},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.43149998784065247},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.4311999976634979},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.4178999960422516},{"id":"https://openalex.org/keywords/baseline","display_name":"Baseline (sea)","score":0.41690000891685486}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7906000018119812},{"id":"https://openalex.org/C177606310","wikidata":"https://www.wikidata.org/wiki/Q5674297","display_name":"Adaptability","level":2,"score":0.5784000158309937},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5573999881744385},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.47609999775886536},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4399000108242035},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.43149998784065247},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.4311999976634979},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.4178999960422516},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.41690000891685486},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.40950000286102295},{"id":"https://openalex.org/C123657996","wikidata":"https://www.wikidata.org/wiki/Q12271","display_name":"Architecture","level":2,"score":0.34439998865127563},{"id":"https://openalex.org/C206345919","wikidata":"https://www.wikidata.org/wiki/Q20380951","display_name":"Resource (disambiguation)","level":2,"score":0.33070001006126404},{"id":"https://openalex.org/C2776434776","wikidata":"https://www.wikidata.org/wiki/Q19246213","display_name":"Domain adaptation","level":3,"score":0.32260000705718994},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.2703999876976013},{"id":"https://openalex.org/C12713177","wikidata":"https://www.wikidata.org/wiki/Q1900281","display_name":"Perspective (graphical)","level":2,"score":0.25949999690055847},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.258899986743927}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.3390/make7040142","is_oa":true,"landing_page_url":"https://doi.org/10.3390/make7040142","pdf_url":"https://www.mdpi.com/2504-4990/7/4/142/pdf?version=1762858739","source":{"id":"https://openalex.org/S4210213891","display_name":"Machine Learning and Knowledge Extraction","issn_l":"2504-4990","issn":["2504-4990"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Machine Learning and Knowledge Extraction","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:84248e88274a46bd9cf060d53594a003","is_oa":true,"landing_page_url":"https://doaj.org/article/84248e88274a46bd9cf060d53594a003","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Machine Learning and Knowledge Extraction, Vol 7, Iss 4, p 142 (2025)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.3390/make7040142","is_oa":true,"landing_page_url":"https://doi.org/10.3390/make7040142","pdf_url":"https://www.mdpi.com/2504-4990/7/4/142/pdf?version=1762858739","source":{"id":"https://openalex.org/S4210213891","display_name":"Machine Learning and Knowledge Extraction","issn_l":"2504-4990","issn":["2504-4990"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Machine Learning and Knowledge Extraction","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G2844273342","display_name":null,"funder_award_id":"L2\u201150070","funder_id":"https://openalex.org/F4320322554","funder_display_name":"Javna Agencija za Raziskovalno Dejavnost RS"},{"id":"https://openalex.org/G3537734062","display_name":null,"funder_award_id":"PR-12394","funder_id":"https://openalex.org/F4320322554","funder_display_name":"Javna Agencija za Raziskovalno Dejavnost RS"},{"id":"https://openalex.org/G7530123626","display_name":null,"funder_award_id":"P2-0103","funder_id":"https://openalex.org/F4320322554","funder_display_name":"Javna Agencija za Raziskovalno Dejavnost RS"},{"id":"https://openalex.org/G976615299","display_name":null,"funder_award_id":"GC-0002","funder_id":"https://openalex.org/F4320322554","funder_display_name":"Javna Agencija za Raziskovalno Dejavnost RS"}],"funders":[{"id":"https://openalex.org/F4320322554","display_name":"Javna Agencija za Raziskovalno Dejavnost RS","ror":"https://ror.org/059bp8k51"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4416127052.pdf","grobid_xml":"https://content.openalex.org/works/W4416127052.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Amid":[0],"ongoing":[1],"efforts":[2],"to":[3,37,54,74,84,116,134],"develop":[4],"extremely":[5],"large,":[6],"multimodal":[7],"models,":[8,176],"there":[9],"is":[10],"increasing":[11],"interest":[12],"in":[13,44,178,195],"efficient":[14,128],"Small":[15],"Language":[16],"Models":[17],"(SLMs)":[18],"that":[19,56,147,162],"can":[20],"operate":[21],"without":[22,183],"reliance":[23],"on":[24,98,151],"large":[25],"data-centre":[26],"infrastructure.":[27],"However,":[28],"recent":[29],"SLMs":[30],"(e.g.,":[31],"LLaMA":[32],"or":[33,156],"Phi)":[34],"with":[35,94,111,126,204],"up":[36,115],"three":[38],"billion":[39],"parameters":[40],"are":[41],"predominantly":[42],"trained":[43],"high-resource":[45],"languages,":[46,100],"such":[47,101],"as":[48,102],"English,":[49],"which":[50,123],"limits":[51],"their":[52],"applicability":[53],"industries":[55],"require":[57],"robust":[58],"NLP":[59],"solutions":[60],"for":[61,89],"less-represented":[62,99],"languages":[63],"and":[64,72,138],"low-resource":[65],"settings,":[66],"particularly":[67,177],"those":[68],"requiring":[69,184],"low":[70],"latency":[71],"adaptability":[73],"evolving":[75],"label":[76,140,181],"spaces.":[77],"This":[78,104],"paper":[79],"examines":[80],"a":[81,90,95,144,152],"retrieval-based":[82],"approach":[83,146,194],"multi-label":[85,167],"text":[86,168],"classification":[87,169],"(MLC)":[88],"media":[91],"monitoring":[92],"dataset,":[93],"particular":[96],"focus":[97],"Slovene.":[103],"dataset":[105],"presents":[106],"an":[107],"extreme":[108,166],"MLC":[109],"challenge,":[110],"instances":[112],"labelled":[113],"using":[114],"twelve":[117],"thousand":[118],"categories.":[119],"The":[120],"proposed":[121],"method,":[122],"combines":[124],"retrieval":[125],"computationally":[127],"prediction,":[129],"effectively":[130],"addresses":[131],"challenges":[132],"related":[133],"multilinguality,":[135],"resource":[136],"constraints,":[137],"frequent":[139],"changes.":[141],"We":[142],"adopt":[143],"model-agnostic":[145],"does":[148],"not":[149],"rely":[150],"specific":[153],"model":[154],"architecture":[155],"language":[157],"selection.":[158],"Our":[159],"results":[160],"demonstrate":[161],"techniques":[163],"from":[164],"the":[165,190],"(XMC)":[170],"domain":[171],"outperform":[172],"traditional":[173],"Transformer-based":[174],"encoder":[175],"handling":[179],"dynamic":[180],"spaces":[182],"continuous":[185],"fine-tuning.":[186],"Additionally,":[187],"we":[188],"highlight":[189],"effectiveness":[191],"of":[192],"this":[193],"scenarios":[196],"involving":[197],"rare":[198],"labels,":[199],"where":[200],"baseline":[201],"models":[202],"struggle":[203],"generalisation.":[205]},"counts_by_year":[],"updated_date":"2026-05-21T06:26:12.895304","created_date":"2025-11-11T00:00:00"}
