{"id":"https://openalex.org/W2957234659","doi":"https://doi.org/10.1145/3297278","title":"Automatic Diacritics Restoration for Tunisian Dialect","display_name":"Automatic Diacritics Restoration for Tunisian Dialect","publication_year":2019,"publication_date":"2019-07-12","ids":{"openalex":"https://openalex.org/W2957234659","doi":"https://doi.org/10.1145/3297278","mag":"2957234659"},"language":"en","primary_location":{"id":"doi:10.1145/3297278","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3297278","pdf_url":null,"source":{"id":"https://openalex.org/S4306421405","display_name":"ACM Transactions on Asian and Low-Resource Language Information Processing","issn_l":"2375-4699","issn":["2375-4699","2375-4702"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Asian and Low-Resource Language Information Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5103283837","display_name":"Abir Masmoudi","orcid":"https://orcid.org/0000-0002-7450-360X"},"institutions":[{"id":"https://openalex.org/I142899784","display_name":"University of Sfax","ror":"https://ror.org/04d4sd432","country_code":"TN","type":"education","lineage":["https://openalex.org/I142899784"]}],"countries":["TN"],"is_corresponding":true,"raw_author_name":"Abir Masmoudi","raw_affiliation_strings":["MIRACL Laboratory-University of Sfax, Tunisia"],"affiliations":[{"raw_affiliation_string":"MIRACL Laboratory-University of Sfax, Tunisia","institution_ids":["https://openalex.org/I142899784"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056172842","display_name":"Salima Mdhaffar","orcid":"https://orcid.org/0000-0002-8472-6890"},"institutions":[{"id":"https://openalex.org/I142899784","display_name":"University of Sfax","ror":"https://ror.org/04d4sd432","country_code":"TN","type":"education","lineage":["https://openalex.org/I142899784"]}],"countries":["TN"],"is_corresponding":false,"raw_author_name":"Salima Mdhaffar","raw_affiliation_strings":["MIRACL Laboratory-University of Sfax, Tunisia"],"affiliations":[{"raw_affiliation_string":"MIRACL Laboratory-University of Sfax, Tunisia","institution_ids":["https://openalex.org/I142899784"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5077074796","display_name":"Rahma Sellami","orcid":null},"institutions":[{"id":"https://openalex.org/I142899784","display_name":"University of Sfax","ror":"https://ror.org/04d4sd432","country_code":"TN","type":"education","lineage":["https://openalex.org/I142899784"]}],"countries":["TN"],"is_corresponding":false,"raw_author_name":"Rahma Sellami","raw_affiliation_strings":["MIRACL Laboratory-University of Sfax, Tunisia"],"affiliations":[{"raw_affiliation_string":"MIRACL Laboratory-University of Sfax, Tunisia","institution_ids":["https://openalex.org/I142899784"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5001984598","display_name":"Lamia Hadrich Belguith","orcid":"https://orcid.org/0000-0002-4868-657X"},"institutions":[{"id":"https://openalex.org/I142899784","display_name":"University of Sfax","ror":"https://ror.org/04d4sd432","country_code":"TN","type":"education","lineage":["https://openalex.org/I142899784"]}],"countries":["TN"],"is_corresponding":false,"raw_author_name":"Lamia Hadrich Belguith","raw_affiliation_strings":["MIRACL Laboratory-University of Sfax, Tunisia"],"affiliations":[{"raw_affiliation_string":"MIRACL Laboratory-University of Sfax, Tunisia","institution_ids":["https://openalex.org/I142899784"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5103283837"],"corresponding_institution_ids":["https://openalex.org/I142899784"],"apc_list":null,"apc_paid":null,"fwci":1.5402,"has_fulltext":false,"cited_by_count":16,"citation_normalized_percentile":{"value":0.87309121,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":"18","issue":"3","first_page":"1","last_page":"18"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9882000088691711,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13912","display_name":"Language, Linguistics, Cultural Analysis","score":0.9861000180244446,"subfield":{"id":"https://openalex.org/subfields/1203","display_name":"Language and Linguistics"},"field":{"id":"https://openalex.org/fields/12","display_name":"Arts and Humanities"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7629578113555908},{"id":"https://openalex.org/keywords/spelling","display_name":"Spelling","score":0.7100443243980408},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.7080209255218506},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6106461882591248},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.5809544324874878},{"id":"https://openalex.org/keywords/ambiguity","display_name":"Ambiguity","score":0.5160014629364014},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.4636550843715668},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.4581555128097534},{"id":"https://openalex.org/keywords/orthography","display_name":"Orthography","score":0.4325958490371704},{"id":"https://openalex.org/keywords/word-error-rate","display_name":"Word error rate","score":0.42938846349716187},{"id":"https://openalex.org/keywords/arabic","display_name":"Arabic","score":0.41581523418426514},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.07076510787010193}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7629578113555908},{"id":"https://openalex.org/C2777801307","wikidata":"https://www.wikidata.org/wiki/Q2088390","display_name":"Spelling","level":2,"score":0.7100443243980408},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.7080209255218506},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6106461882591248},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.5809544324874878},{"id":"https://openalex.org/C2780522230","wikidata":"https://www.wikidata.org/wiki/Q1140419","display_name":"Ambiguity","level":2,"score":0.5160014629364014},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.4636550843715668},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.4581555128097534},{"id":"https://openalex.org/C150670947","wikidata":"https://www.wikidata.org/wiki/Q43091","display_name":"Orthography","level":3,"score":0.4325958490371704},{"id":"https://openalex.org/C40969351","wikidata":"https://www.wikidata.org/wiki/Q3516228","display_name":"Word error rate","level":2,"score":0.42938846349716187},{"id":"https://openalex.org/C96455323","wikidata":"https://www.wikidata.org/wiki/Q13955","display_name":"Arabic","level":2,"score":0.41581523418426514},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.07076510787010193},{"id":"https://openalex.org/C554936623","wikidata":"https://www.wikidata.org/wiki/Q199657","display_name":"Reading (process)","level":2,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3297278","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3297278","pdf_url":null,"source":{"id":"https://openalex.org/S4306421405","display_name":"ACM Transactions on Asian and Low-Resource Language Information Processing","issn_l":"2375-4699","issn":["2375-4699","2375-4702"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Asian and Low-Resource Language Information Processing","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Reduced inequalities","id":"https://metadata.un.org/sdg/10","score":0.550000011920929}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":53,"referenced_works":["https://openalex.org/W45643337","https://openalex.org/W146945658","https://openalex.org/W187659528","https://openalex.org/W850051568","https://openalex.org/W1516184288","https://openalex.org/W1524281572","https://openalex.org/W1558613094","https://openalex.org/W1565703351","https://openalex.org/W1631260214","https://openalex.org/W1659740212","https://openalex.org/W1972404024","https://openalex.org/W2009639677","https://openalex.org/W2011402185","https://openalex.org/W2012804051","https://openalex.org/W2041375131","https://openalex.org/W2056382745","https://openalex.org/W2063116544","https://openalex.org/W2063718015","https://openalex.org/W2065400286","https://openalex.org/W2097497389","https://openalex.org/W2109613320","https://openalex.org/W2117202778","https://openalex.org/W2124807415","https://openalex.org/W2132283323","https://openalex.org/W2147880316","https://openalex.org/W2153186553","https://openalex.org/W2156985047","https://openalex.org/W2233440677","https://openalex.org/W2250414785","https://openalex.org/W2250751111","https://openalex.org/W2251114654","https://openalex.org/W2251190179","https://openalex.org/W2251658995","https://openalex.org/W2251945664","https://openalex.org/W2293965775","https://openalex.org/W2560475547","https://openalex.org/W2571613108","https://openalex.org/W2571755499","https://openalex.org/W2572792611","https://openalex.org/W2579409202","https://openalex.org/W2581564515","https://openalex.org/W2585813550","https://openalex.org/W2612649659","https://openalex.org/W2738254237","https://openalex.org/W2739702159","https://openalex.org/W2739711390","https://openalex.org/W2760231680","https://openalex.org/W2891232548","https://openalex.org/W2906891164","https://openalex.org/W2998215494","https://openalex.org/W3202110839","https://openalex.org/W4245193884","https://openalex.org/W4285719527"],"related_works":["https://openalex.org/W1840154465","https://openalex.org/W2293456502","https://openalex.org/W2030487085","https://openalex.org/W2403872937","https://openalex.org/W1994041352","https://openalex.org/W2065885317","https://openalex.org/W4231597420","https://openalex.org/W2397227848","https://openalex.org/W3140870753","https://openalex.org/W2957234659"],"abstract_inverted_index":{"Modern":[0],"Standard":[1],"Arabic,":[2],"as":[3,5,67,69,200],"well":[4],"Arabic":[6,35,63,86],"dialect":[7],"languages,":[8],"are":[9],"usually":[10],"written":[11],"without":[12,36,46,99],"diacritics.":[13,97,224],"The":[14,237],"absence":[15],"of":[16,27,41,61,76,85,108,112,172,223,242],"these":[17,28],"marks":[18],"constitute":[19],"a":[20,44,89,101,130,191,197,201],"real":[21],"problem":[22,90],"in":[23],"the":[24,74,82,116,119,123,138,154,164,169,212,221,231,234,247],"automatic":[25,170,243],"processing":[26],"data":[29],"by":[30],"NLP":[31],"tools.":[32],"Indeed,":[33],"writing":[34],"diacritics":[37,100],"introduces":[38],"several":[39],"types":[40],"ambiguity.":[42],"First,":[43],"word":[45,64,102,232],"diacratics":[47],"could":[48,103],"have":[49,66,104,122,137],"many":[50,68,105],"possible":[51,106],"meanings":[52],"depending":[53,72],"on":[54,73,206,246],"their":[55],"diacritization.":[56],"Second,":[57],"undiacritized":[58],"surface":[59],"forms":[60],"an":[62],"might":[65,87],"200":[70],"readings":[71],"complexity":[75],"its":[77],"morphology":[78],"[12].":[79],"In":[80,159,211],"fact,":[81],"agglutination":[83],"property":[84],"produce":[88],"that":[91,121,136,167],"can":[92],"only":[93],"be":[94],"resolved":[95],"using":[96],"Third,":[98],"parts":[107],"speech":[109],"(POS)":[110],"instead":[111],"one.":[113],"This":[114],"is":[115,151],"case":[117],"with":[118],"words":[120,135],"same":[124,139],"spelling":[125,140],"and":[126,145,182,196,233,257],"POS":[127,143,217],"tag":[128],"but":[129,141],"different":[131,142],"lexical":[132,146],"sense,":[133],"or":[134],"tags":[144],"senses":[147],"[8].":[148],"Finally,":[149],"there":[150],"ambiguity":[152],"at":[153,229],"grammatical":[155],"level":[156],"(syntactic":[157],"ambiguity).":[158],"this":[160],"article,":[161],"we":[162,185,215],"propose":[163,186],"first":[165,177],"work":[166],"investigates":[168],"diacritization":[171,244],"Tunisian":[173],"Dialect":[174],"texts.":[175],"We":[176],"describe":[178],"our":[179],"annotation":[180],"guidelines":[181],"procedure.":[183],"Then,":[184],"two":[187],"major":[188],"models,":[189],"namely":[190],"statistical":[192],"machine":[193],"translation":[194],"(SMT)":[195],"discriminative":[198],"model":[199],"sequence":[202],"classification":[203],"task":[204],"based":[205,245],"Conditional":[207],"Random":[208],"Fields":[209],"(CRF).":[210],"second":[213],"approach,":[214],"integrate":[216],"features":[218],"to":[219],"influence":[220],"generation":[222],"Diacritics":[225],"restoration":[226],"was":[227],"performed":[228],"both":[230],"character":[235],"levels.":[236],"results":[238],"showed":[239],"high":[240],"scores":[241],"CRF":[248,256],"system":[249],"(Word":[250],"Error":[251],"Rate":[252],"(WER)":[253],"21.44%":[254],"for":[255,260],"WER":[258],"34.6%":[259],"SMT).":[261]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":4},{"year":2021,"cited_by_count":4},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
