{"id":"https://openalex.org/W4385363235","doi":"https://doi.org/10.1145/3610522","title":"DZ-SMS: An Authentic Corpus of Algerian SMS","display_name":"DZ-SMS: An Authentic Corpus of Algerian SMS","publication_year":2023,"publication_date":"2023-07-27","ids":{"openalex":"https://openalex.org/W4385363235","doi":"https://doi.org/10.1145/3610522"},"language":"en","primary_location":{"id":"doi:10.1145/3610522","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3610522","pdf_url":null,"source":{"id":"https://openalex.org/S4306421405","display_name":"ACM Transactions on Asian and Low-Resource Language Information Processing","issn_l":"2375-4699","issn":["2375-4699","2375-4702"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Asian and Low-Resource Language Information Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5092564454","display_name":"Brahim Dahou","orcid":"https://orcid.org/0000-0002-6150-8903"},"institutions":[{"id":"https://openalex.org/I157169809","display_name":"University of Sciences and Technology Houari Boumediene","ror":"https://ror.org/02kb89c09","country_code":"DZ","type":"education","lineage":["https://openalex.org/I157169809"]}],"countries":["DZ"],"is_corresponding":true,"raw_author_name":"Brahim Dahou","raw_affiliation_strings":["University of Science and Technology Houari Boumediene, Algeria"],"raw_orcid":"https://orcid.org/0000-0002-6150-8903","affiliations":[{"raw_affiliation_string":"University of Science and Technology Houari Boumediene, Algeria","institution_ids":["https://openalex.org/I157169809"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5029774217","display_name":"Leila Falek","orcid":"https://orcid.org/0000-0002-8719-8755"},"institutions":[{"id":"https://openalex.org/I157169809","display_name":"University of Sciences and Technology Houari Boumediene","ror":"https://ror.org/02kb89c09","country_code":"DZ","type":"education","lineage":["https://openalex.org/I157169809"]}],"countries":["DZ"],"is_corresponding":false,"raw_author_name":"Leila Falek","raw_affiliation_strings":["University of Science and Technology Houari Boumediene, Algeria"],"raw_orcid":"https://orcid.org/0000-0002-8719-8755","affiliations":[{"raw_affiliation_string":"University of Science and Technology Houari Boumediene, Algeria","institution_ids":["https://openalex.org/I157169809"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5059139666","display_name":"Mourad Abbas","orcid":"https://orcid.org/0000-0002-8291-0862"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mourad Abbas","raw_affiliation_strings":["Computational Linguistics Department, CRSTDLA, Algeria"],"raw_orcid":"https://orcid.org/0000-0002-8291-0862","affiliations":[{"raw_affiliation_string":"Computational Linguistics Department, CRSTDLA, Algeria","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013474701","display_name":"Slimane Mekaoui","orcid":"https://orcid.org/0000-0002-9828-3779"},"institutions":[{"id":"https://openalex.org/I157169809","display_name":"University of Sciences and Technology Houari Boumediene","ror":"https://ror.org/02kb89c09","country_code":"DZ","type":"education","lineage":["https://openalex.org/I157169809"]}],"countries":["DZ"],"is_corresponding":false,"raw_author_name":"Slimane Mekaoui","raw_affiliation_strings":["University of Science and Technology Houari Boumediene, Algeria"],"raw_orcid":"https://orcid.org/0000-0002-9828-3779","affiliations":[{"raw_affiliation_string":"University of Science and Technology Houari Boumediene, Algeria","institution_ids":["https://openalex.org/I157169809"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5085833879","display_name":"Mohamed Lichouri","orcid":"https://orcid.org/0000-0003-0584-1389"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mohamed Lichouri","raw_affiliation_strings":["Computational Linguistics Department, CRSTDLA, Algeria"],"raw_orcid":"https://orcid.org/0000-0003-0584-1389","affiliations":[{"raw_affiliation_string":"Computational Linguistics Department, CRSTDLA, Algeria","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5065802975","display_name":"Aicha Zitouni","orcid":"https://orcid.org/0000-0002-4311-9912"},"institutions":[{"id":"https://openalex.org/I157169809","display_name":"University of Sciences and Technology Houari Boumediene","ror":"https://ror.org/02kb89c09","country_code":"DZ","type":"education","lineage":["https://openalex.org/I157169809"]}],"countries":["DZ"],"is_corresponding":false,"raw_author_name":"Aicha Zitouni","raw_affiliation_strings":["University of Science and Technology Houari Boumediene, Algeria"],"raw_orcid":"https://orcid.org/0000-0002-4311-9912","affiliations":[{"raw_affiliation_string":"University of Science and Technology Houari Boumediene, Algeria","institution_ids":["https://openalex.org/I157169809"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5092564454"],"corresponding_institution_ids":["https://openalex.org/I157169809"],"apc_list":null,"apc_paid":null,"fwci":0.492,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.6539502,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":96},"biblio":{"volume":"22","issue":"8","first_page":"1","last_page":"21"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13155","display_name":"Digital Communication and Language","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1709","display_name":"Human-Computer Interaction"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T13155","display_name":"Digital Communication and Language","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1709","display_name":"Human-Computer Interaction"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12380","display_name":"Authorship Attribution and Profiling","score":0.9889000058174133,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9887999892234802,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5943857431411743},{"id":"https://openalex.org/keywords/short-message-service","display_name":"Short Message Service","score":0.5720099210739136},{"id":"https://openalex.org/keywords/realization","display_name":"Realization (probability)","score":0.5441104769706726},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5437922477722168},{"id":"https://openalex.org/keywords/oversampling","display_name":"Oversampling","score":0.5167814493179321},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.5167219042778015},{"id":"https://openalex.org/keywords/lexical-analysis","display_name":"Lexical analysis","score":0.47059333324432373},{"id":"https://openalex.org/keywords/support-vector-machine","display_name":"Support vector machine","score":0.4515291452407837},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.44737541675567627},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.19463959336280823},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.15075764060020447}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5943857431411743},{"id":"https://openalex.org/C74558129","wikidata":"https://www.wikidata.org/wiki/Q43024","display_name":"Short Message Service","level":2,"score":0.5720099210739136},{"id":"https://openalex.org/C2781089630","wikidata":"https://www.wikidata.org/wiki/Q21856745","display_name":"Realization (probability)","level":2,"score":0.5441104769706726},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5437922477722168},{"id":"https://openalex.org/C197323446","wikidata":"https://www.wikidata.org/wiki/Q331222","display_name":"Oversampling","level":3,"score":0.5167814493179321},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.5167219042778015},{"id":"https://openalex.org/C176982825","wikidata":"https://www.wikidata.org/wiki/Q835922","display_name":"Lexical analysis","level":2,"score":0.47059333324432373},{"id":"https://openalex.org/C12267149","wikidata":"https://www.wikidata.org/wiki/Q282453","display_name":"Support vector machine","level":2,"score":0.4515291452407837},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.44737541675567627},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.19463959336280823},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.15075764060020447},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C2776257435","wikidata":"https://www.wikidata.org/wiki/Q1576430","display_name":"Bandwidth (computing)","level":2,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3610522","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3610522","pdf_url":null,"source":{"id":"https://openalex.org/S4306421405","display_name":"ACM Transactions on Asian and Low-Resource Language Information Processing","issn_l":"2375-4699","issn":["2375-4699","2375-4702"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Asian and Low-Resource Language Information Processing","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.6700000166893005,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":18,"referenced_works":["https://openalex.org/W1524281572","https://openalex.org/W1552638064","https://openalex.org/W2030575204","https://openalex.org/W2091411809","https://openalex.org/W2113855447","https://openalex.org/W2171923384","https://openalex.org/W2250212502","https://openalex.org/W2325227998","https://openalex.org/W2488866986","https://openalex.org/W2560280095","https://openalex.org/W2613000335","https://openalex.org/W2765947769","https://openalex.org/W2997591727","https://openalex.org/W3021308914","https://openalex.org/W3036800214","https://openalex.org/W3046137360","https://openalex.org/W3099138433","https://openalex.org/W4247909370"],"related_works":["https://openalex.org/W2355927362","https://openalex.org/W1516839994","https://openalex.org/W3126512388","https://openalex.org/W2010789233","https://openalex.org/W4382068721","https://openalex.org/W4239003064","https://openalex.org/W2805332480","https://openalex.org/W2789919619","https://openalex.org/W2101819884","https://openalex.org/W2351210157"],"abstract_inverted_index":{"In":[0],"this":[1],"article,":[2],"a":[3,7,76,98,261,283,312],"complete":[4],"methodology":[5],"of":[6,10,43,61,78,95,101,116,125,132,163,175,206,210,216,264,286,296,315],"corpus":[8],"realization":[9],"authentic":[11],"Short":[12],"Message":[13],"Service":[14],"(SMS)":[15],"from":[16,38,168],"Algerian":[17,316],"dialect":[18],"and":[19,47,53,74,122,153,192,233,242],"which":[20,291],"are":[21,106,293],"transcribed":[22],"in":[23,294],"Latin":[24],"characters":[25],"or":[26],"symbols":[27],"is":[28,185],"presented.":[29],"A":[30,172],"linguistic":[31],"material":[32],"constituted":[33],"by":[34,222,247,254,269,299],"6,000":[35],"SMS":[36],"coming":[37],"the":[39,79,109,114,117,120,123,126,130,133,161,169,176,181,189,207,245,250,256,267,272,278,297],"different":[40],"geographical":[41,54],"regions":[42,67],"Algeria":[44],"(Middle,":[45],"East,":[46],"West)":[48],"corresponding":[49],"to":[50,72,129,158,166,304],"42":[51],"administrative":[52,102],"departments,":[55],"have":[56,301],"been":[57,220],"collected.":[58],"The":[59,149,213,289],"coexistence":[60],"several":[62],"dialects":[63],"through":[64],"these":[65],"three":[66,89,136,141,224,306],"simultaneously":[68],"has":[69,87,178,195,201,219,259,280],"obliged":[70],"us":[71,303],"consider":[73],"operate":[75],"classification":[77,86,246,268,298],"data":[80,85,183],"for":[81],"each":[82,94],"dialect.":[83],"This":[84,237],"yielded":[88,282],"extracted":[90],"regional":[91,308],"dialectic":[92],"corpora,":[93,309],"them":[96],"covering":[97,311],"specific":[99],"number":[100,162,314],"departments.":[103,317],"These":[104],"treatments":[105],"based":[107,139],"on":[108,140],"so-called":[110],"Data-n-gram":[111],"tokenization":[112],"targeting":[113],"suppression":[115],"stop":[118],"words,":[119],"stemming":[121,193],"imbalance":[124,218],"classes":[127,217],"linked":[128],"nature":[131],"SMS.":[134],"Consequently,":[135],"text":[137],"classifiers":[138],"linear":[142],"classifiers,":[143],"namely,":[144],"Stochastic":[145],"Gradient":[146],"Descent":[147],"(SGD),":[148],"Ridge":[150],"Regression":[151],"(RDG),":[152],"Linear":[154],"Support":[155],"Vector":[156],"Machines,":[157],"find":[159],"out":[160],"significant":[164],"corpora":[165],"extract":[167],"collected":[170],"data.":[171],"deep":[173],"analysis":[174],"results":[177,241],"shown":[179],"that":[180,200],"5-grams":[182],"representation":[184],"more":[186],"representative":[187],"whereas":[188,266],"stop-words":[190],"removal":[191],"process":[194,252,274],"generated":[196],"an":[197,204],"information":[198],"loss":[199],"subsequently":[202],"inferred":[203],"alteration":[205],"recognition":[208],"rate":[209],"about":[211,287],"2%.":[212],"emerging":[214],"problem":[215],"treated":[221],"using":[223,255],"techniques:":[225],"Random":[226],"Oversampling,":[227],"Synthetic":[228,235],"Minorities":[229],"Oversampling":[230],"Technique":[231],"(SMOTE),":[232],"Adaptive":[234],"(ADASYN).":[236],"treatment":[238],"produced":[239],"interesting":[240],"enhancements;":[243],"particularly,":[244],"region":[248],"with":[249,271,277],"oversampling":[251,273],"SMOTE":[253],"RDG":[257],"technique":[258],"reached":[260],"better":[262],"percentage":[263],"55.93%":[265],"department":[270],"ADASYN":[275],"associated":[276],"SGD":[279],"only":[281],"maximum":[284],"score":[285],"17.11%.":[288],"results,":[290],"undoubtedly":[292],"favor":[295],"region,":[300],"compelled":[302],"create":[305],"Subdialectal":[307],"each,":[310],"certain":[313]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2023,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
