{"id":"https://openalex.org/W4415969643","doi":"https://doi.org/10.1109/cist65886.2025.11224302","title":"Building a Machine Learning Classifier for Synonyms Validation in Moroccan Darija","display_name":"Building a Machine Learning Classifier for Synonyms Validation in Moroccan Darija","publication_year":2025,"publication_date":"2025-10-04","ids":{"openalex":"https://openalex.org/W4415969643","doi":"https://doi.org/10.1109/cist65886.2025.11224302"},"language":null,"primary_location":{"id":"doi:10.1109/cist65886.2025.11224302","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cist65886.2025.11224302","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE 8th Congress on Information Science and Technology (CiSt)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5120283884","display_name":"Said Belbachir","orcid":null},"institutions":[{"id":"https://openalex.org/I240042149","display_name":"Abdelmalek Essa\u00e2di University","ror":"https://ror.org/03c4shz64","country_code":"MA","type":"education","lineage":["https://openalex.org/I240042149"]}],"countries":["MA"],"is_corresponding":false,"raw_author_name":"Said Belbachir","raw_affiliation_strings":["Faculty of Sciences, Abdelmalek Essaadi University,New Technology Trends for Innovation Laboratory,Tetouan,Morocco"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Faculty of Sciences, Abdelmalek Essaadi University,New Technology Trends for Innovation Laboratory,Tetouan,Morocco","institution_ids":["https://openalex.org/I240042149"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5009123662","display_name":"Ouafae Nahli","orcid":"https://orcid.org/0000-0003-3147-7082"},"institutions":[{"id":"https://openalex.org/I4210124522","display_name":"Institute for Computational Linguistics \u201cA. Zampolli\u201d","ror":"https://ror.org/028g3pe33","country_code":"IT","type":"facility","lineage":["https://openalex.org/I4210124522","https://openalex.org/I4210155236"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Ouafae Nahli","raw_affiliation_strings":["Consiglio Nazionale Delle Ricerche,Instituto di Linguistica Computazionale,Pisa,Italy"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Consiglio Nazionale Delle Ricerche,Instituto di Linguistica Computazionale,Pisa,Italy","institution_ids":["https://openalex.org/I4210124522"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5085525274","display_name":"Mohammed El Mohajir","orcid":"https://orcid.org/0000-0001-6602-0168"},"institutions":[{"id":"https://openalex.org/I240042149","display_name":"Abdelmalek Essa\u00e2di University","ror":"https://ror.org/03c4shz64","country_code":"MA","type":"education","lineage":["https://openalex.org/I240042149"]}],"countries":["MA"],"is_corresponding":false,"raw_author_name":"Mohammed El Mohajir","raw_affiliation_strings":["Faculty of Sciences, Abdelmalek Essaadi University,New Technology Trends for Innovation Laboratory,Tetouan,Morocco"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Faculty of Sciences, Abdelmalek Essaadi University,New Technology Trends for Innovation Laboratory,Tetouan,Morocco","institution_ids":["https://openalex.org/I240042149"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5035441812","display_name":"Mohamed Chahhou","orcid":null},"institutions":[{"id":"https://openalex.org/I240042149","display_name":"Abdelmalek Essa\u00e2di University","ror":"https://ror.org/03c4shz64","country_code":"MA","type":"education","lineage":["https://openalex.org/I240042149"]}],"countries":["MA"],"is_corresponding":false,"raw_author_name":"Mohamed Chahhou","raw_affiliation_strings":["Faculty of Sciences, Abdelmalek Essaadi University,New Technology Trends for Innovation Laboratory,Tetouan,Morocco"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Faculty of Sciences, Abdelmalek Essaadi University,New Technology Trends for Innovation Laboratory,Tetouan,Morocco","institution_ids":["https://openalex.org/I240042149"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.15411179,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"80","last_page":"87"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.3917999863624573,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.3917999863624573,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12380","display_name":"Authorship Attribution and Profiling","score":0.2994999885559082,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.030899999663233757,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/wordnet","display_name":"WordNet","score":0.7957000136375427},{"id":"https://openalex.org/keywords/classifier","display_name":"Classifier (UML)","score":0.588699996471405},{"id":"https://openalex.org/keywords/random-forest","display_name":"Random forest","score":0.4959000051021576},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.47620001435279846},{"id":"https://openalex.org/keywords/arabic","display_name":"Arabic","score":0.38589999079704285},{"id":"https://openalex.org/keywords/identification","display_name":"Identification (biology)","score":0.35030001401901245}],"concepts":[{"id":"https://openalex.org/C157659113","wikidata":"https://www.wikidata.org/wiki/Q533822","display_name":"WordNet","level":2,"score":0.7957000136375427},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7817000150680542},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7462999820709229},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.605400025844574},{"id":"https://openalex.org/C95623464","wikidata":"https://www.wikidata.org/wiki/Q1096149","display_name":"Classifier (UML)","level":2,"score":0.588699996471405},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5181000232696533},{"id":"https://openalex.org/C169258074","wikidata":"https://www.wikidata.org/wiki/Q245748","display_name":"Random forest","level":2,"score":0.4959000051021576},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.47620001435279846},{"id":"https://openalex.org/C96455323","wikidata":"https://www.wikidata.org/wiki/Q13955","display_name":"Arabic","level":2,"score":0.38589999079704285},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.35030001401901245},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.3138999938964844},{"id":"https://openalex.org/C130318100","wikidata":"https://www.wikidata.org/wiki/Q2268914","display_name":"Semantic similarity","level":2,"score":0.2928999960422516},{"id":"https://openalex.org/C155092808","wikidata":"https://www.wikidata.org/wiki/Q182557","display_name":"Computational linguistics","level":2,"score":0.2892000079154968},{"id":"https://openalex.org/C136389625","wikidata":"https://www.wikidata.org/wiki/Q334384","display_name":"Supervised learning","level":3,"score":0.2872999906539917},{"id":"https://openalex.org/C2776145971","wikidata":"https://www.wikidata.org/wiki/Q30673951","display_name":"Labeled data","level":2,"score":0.26429998874664307}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/cist65886.2025.11224302","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cist65886.2025.11224302","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE 8th Congress on Information Science and Technology (CiSt)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":16,"referenced_works":["https://openalex.org/W3121563664","https://openalex.org/W3137180236","https://openalex.org/W3177235262","https://openalex.org/W3212135906","https://openalex.org/W4226327328","https://openalex.org/W4229051452","https://openalex.org/W4230097545","https://openalex.org/W4245088086","https://openalex.org/W4251372957","https://openalex.org/W4283641397","https://openalex.org/W4289521457","https://openalex.org/W4317902431","https://openalex.org/W4372079617","https://openalex.org/W4383265687","https://openalex.org/W4387164728","https://openalex.org/W4402670856"],"related_works":[],"abstract_inverted_index":{"Building":[0],"lexical":[1,31,74,157],"resources":[2],"for":[3,45,116,160],"low-resource":[4],"languages,":[5],"such":[6],"as":[7,85],"Arabic":[8],"dialects,":[9],"remains":[10],"a":[11,41],"challenging":[12],"yet":[13],"essential":[14],"endeavor.":[15],"One":[16],"major":[17],"difficulty":[18],"lies":[19],"in":[20,155],"the":[21,60,64,111,129,132,136,150],"reliable":[22],"identification":[23],"of":[24,63,131,142,147,152],"appropriate":[25,113],"synonyms,":[26],"which":[27],"requires":[28],"both":[29],"rich":[30],"data":[32],"and":[33,75,83,102,144],"robust":[34],"machine":[35],"learning":[36],"techniques.":[37],"This":[38],"study":[39],"presents":[40],"synset":[42,115],"classification":[43,89],"framework":[44],"Darija":[46,118],"(Moroccan":[47],"Arabic),":[48],"leveraging":[49],"contextual":[50,69],"embeddings":[51,154],"derived":[52],"from":[53],"multiple":[54],"Transformer-based":[55,153],"language":[56],"models":[57,106],"to":[58,68,87,109],"capture":[59],"semantic":[61],"richness":[62],"dialect.":[65],"In":[66],"addition":[67],"similarity,":[70],"we":[71],"automatically":[72],"extract":[73],"ontological":[76],"similarity":[77],"features.":[78],"These":[79],"features":[80],"are":[81],"combined":[82],"used":[84],"input":[86],"supervised":[88],"algorithms.":[90],"Several":[91],"classifiers":[92],"were":[93,107],"evaluated,":[94],"including":[95],"Logistic":[96],"Regression,":[97],"Random":[98],"Forest,":[99],"Decision":[100],"Tree,":[101],"Gradient":[103],"Boosting.":[104],"The":[105],"trained":[108],"predict":[110],"most":[112],"WordNet":[114],"each":[117],"word,":[119],"with":[120,135],"performance":[121],"assessed":[122],"through":[123],"k-fold":[124],"cross-validation.":[125],"Experimental":[126],"results":[127],"confirm":[128],"effectiveness":[130],"proposed":[133],"approach,":[134],"best-performing":[137],"model":[138],"achieving":[139],"an":[140,145],"accuracy":[141],"73.28%":[143],"F1-score":[146],"84.81%,":[148],"underscoring":[149],"potential":[151],"advancing":[156],"resource":[158],"development":[159],"under-resourced":[161],"languages.":[162]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-11-06T00:00:00"}
