{"id":"https://openalex.org/W3005277345","doi":"https://doi.org/10.1145/3434235","title":"Arabic Diacritic Recovery Using a Feature-rich biLSTM Model","display_name":"Arabic Diacritic Recovery Using a Feature-rich biLSTM Model","publication_year":2021,"publication_date":"2021-03-31","ids":{"openalex":"https://openalex.org/W3005277345","doi":"https://doi.org/10.1145/3434235","mag":"3005277345"},"language":"en","primary_location":{"id":"doi:10.1145/3434235","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3434235","pdf_url":null,"source":{"id":"https://openalex.org/S4306421405","display_name":"ACM Transactions on Asian and Low-Resource Language Information Processing","issn_l":"2375-4699","issn":["2375-4699","2375-4702"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Asian and Low-Resource Language Information Processing","raw_type":"journal-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2002.01207","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5111452036","display_name":"Kareem Darwish","orcid":null},"institutions":[{"id":"https://openalex.org/I4210144839","display_name":"Hamad bin Khalifa University","ror":"https://ror.org/03eyq4y97","country_code":"QA","type":"education","lineage":["https://openalex.org/I4210144839"]}],"countries":["QA"],"is_corresponding":true,"raw_author_name":"Kareem Darwish","raw_affiliation_strings":["Qatar Computing Research Institute, Hamad Bin Khalifa University, Doha, Qatar","Qatar Computing Research Institute, Hamad Bin Khalifa University Doha, Qatar"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Qatar Computing Research Institute, Hamad Bin Khalifa University, Doha, Qatar","institution_ids":["https://openalex.org/I4210144839"]},{"raw_affiliation_string":"Qatar Computing Research Institute, Hamad Bin Khalifa University Doha, Qatar","institution_ids":["https://openalex.org/I4210144839"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047182137","display_name":"Ahmed Abdelal\u00ed","orcid":"https://orcid.org/0000-0002-4160-8181"},"institutions":[{"id":"https://openalex.org/I4210144839","display_name":"Hamad bin Khalifa University","ror":"https://ror.org/03eyq4y97","country_code":"QA","type":"education","lineage":["https://openalex.org/I4210144839"]}],"countries":["QA"],"is_corresponding":false,"raw_author_name":"Ahmed Abdelali","raw_affiliation_strings":["Qatar Computing Research Institute, Hamad Bin Khalifa University, Doha, Qatar","Qatar Computing Research Institute, Hamad Bin Khalifa University Doha, Qatar"],"raw_orcid":"https://orcid.org/0000-0002-4160-8181","affiliations":[{"raw_affiliation_string":"Qatar Computing Research Institute, Hamad Bin Khalifa University, Doha, Qatar","institution_ids":["https://openalex.org/I4210144839"]},{"raw_affiliation_string":"Qatar Computing Research Institute, Hamad Bin Khalifa University Doha, Qatar","institution_ids":["https://openalex.org/I4210144839"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102924560","display_name":"Hamdy Mubarak","orcid":"https://orcid.org/0000-0002-9051-6240"},"institutions":[{"id":"https://openalex.org/I4210144839","display_name":"Hamad bin Khalifa University","ror":"https://ror.org/03eyq4y97","country_code":"QA","type":"education","lineage":["https://openalex.org/I4210144839"]}],"countries":["QA"],"is_corresponding":false,"raw_author_name":"Hamdy Mubarak","raw_affiliation_strings":["Qatar Computing Research Institute, Hamad Bin Khalifa University, Doha, Qatar","Qatar Computing Research Institute, Hamad Bin Khalifa University Doha, Qatar"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Qatar Computing Research Institute, Hamad Bin Khalifa University, Doha, Qatar","institution_ids":["https://openalex.org/I4210144839"]},{"raw_affiliation_string":"Qatar Computing Research Institute, Hamad Bin Khalifa University Doha, Qatar","institution_ids":["https://openalex.org/I4210144839"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5033221342","display_name":"Mohamed Eldesouki","orcid":"https://orcid.org/0009-0009-6662-5883"},"institutions":[{"id":"https://openalex.org/I4210144839","display_name":"Hamad bin Khalifa University","ror":"https://ror.org/03eyq4y97","country_code":"QA","type":"education","lineage":["https://openalex.org/I4210144839"]}],"countries":["QA"],"is_corresponding":false,"raw_author_name":"Mohamed Eldesouki","raw_affiliation_strings":["Qatar Computing Research Institute, Hamad Bin Khalifa University, Doha, Qatar","Qatar Computing Research Institute, Hamad Bin Khalifa University Doha, Qatar"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Qatar Computing Research Institute, Hamad Bin Khalifa University, Doha, Qatar","institution_ids":["https://openalex.org/I4210144839"]},{"raw_affiliation_string":"Qatar Computing Research Institute, Hamad Bin Khalifa University Doha, Qatar","institution_ids":["https://openalex.org/I4210144839"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5111452036"],"corresponding_institution_ids":["https://openalex.org/I4210144839"],"apc_list":null,"apc_paid":null,"fwci":0.28,"has_fulltext":true,"cited_by_count":3,"citation_normalized_percentile":{"value":0.609362,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":"20","issue":"2","first_page":"1","last_page":"18"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13629","display_name":"Text Readability and Simplification","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.7110949754714966},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6783350706100464},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.6528522372245789},{"id":"https://openalex.org/keywords/word-error-rate","display_name":"Word error rate","score":0.6417837142944336},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6378850340843201},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.6093711853027344},{"id":"https://openalex.org/keywords/arabic","display_name":"Arabic","score":0.6030833721160889},{"id":"https://openalex.org/keywords/modern-standard-arabic","display_name":"Modern Standard Arabic","score":0.5893644094467163},{"id":"https://openalex.org/keywords/variety","display_name":"Variety (cybernetics)","score":0.5730916857719421},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5142528414726257},{"id":"https://openalex.org/keywords/selection","display_name":"Selection (genetic algorithm)","score":0.42655181884765625},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.3859390914440155},{"id":"https://openalex.org/keywords/philosophy","display_name":"Philosophy","score":0.059853821992874146}],"concepts":[{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.7110949754714966},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6783350706100464},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6528522372245789},{"id":"https://openalex.org/C40969351","wikidata":"https://www.wikidata.org/wiki/Q3516228","display_name":"Word error rate","level":2,"score":0.6417837142944336},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6378850340843201},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.6093711853027344},{"id":"https://openalex.org/C96455323","wikidata":"https://www.wikidata.org/wiki/Q13955","display_name":"Arabic","level":2,"score":0.6030833721160889},{"id":"https://openalex.org/C2778243841","wikidata":"https://www.wikidata.org/wiki/Q56467","display_name":"Modern Standard Arabic","level":3,"score":0.5893644094467163},{"id":"https://openalex.org/C136197465","wikidata":"https://www.wikidata.org/wiki/Q1729295","display_name":"Variety (cybernetics)","level":2,"score":0.5730916857719421},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5142528414726257},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.42655181884765625},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.3859390914440155},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.059853821992874146}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1145/3434235","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3434235","pdf_url":null,"source":{"id":"https://openalex.org/S4306421405","display_name":"ACM Transactions on Asian and Low-Resource Language Information Processing","issn_l":"2375-4699","issn":["2375-4699","2375-4702"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Asian and Low-Resource Language Information Processing","raw_type":"journal-article"},{"id":"pmh:oai:arXiv.org:2002.01207","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2002.01207","pdf_url":"https://arxiv.org/pdf/2002.01207","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"mag:3005277345","is_oa":true,"landing_page_url":"https://arxiv.org/pdf/2002.01207","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"doi:10.48550/arxiv.2002.01207","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2002.01207","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2002.01207","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2002.01207","pdf_url":"https://arxiv.org/pdf/2002.01207","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.75}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W3005277345.pdf"},"referenced_works_count":38,"referenced_works":["https://openalex.org/W1713284","https://openalex.org/W45643337","https://openalex.org/W55657863","https://openalex.org/W125610139","https://openalex.org/W195358446","https://openalex.org/W850051568","https://openalex.org/W1514518395","https://openalex.org/W1904365287","https://openalex.org/W2012804051","https://openalex.org/W2020079054","https://openalex.org/W2041349462","https://openalex.org/W2041375131","https://openalex.org/W2054727018","https://openalex.org/W2057622292","https://openalex.org/W2058316855","https://openalex.org/W2062764272","https://openalex.org/W2063116544","https://openalex.org/W2104912629","https://openalex.org/W2109613320","https://openalex.org/W2126784811","https://openalex.org/W2131774270","https://openalex.org/W2132283323","https://openalex.org/W2149995043","https://openalex.org/W2153186553","https://openalex.org/W2250751111","https://openalex.org/W2250816155","https://openalex.org/W2251199546","https://openalex.org/W2251715652","https://openalex.org/W2575598244","https://openalex.org/W2740272870","https://openalex.org/W2795877110","https://openalex.org/W2885738119","https://openalex.org/W2895875496","https://openalex.org/W2897369970","https://openalex.org/W2906891164","https://openalex.org/W2945888757","https://openalex.org/W2963500086","https://openalex.org/W3008803645"],"related_works":["https://openalex.org/W3154883498","https://openalex.org/W2945888757","https://openalex.org/W2075647637","https://openalex.org/W1574997886","https://openalex.org/W2968345023","https://openalex.org/W2957234659","https://openalex.org/W3210631460","https://openalex.org/W2997186977","https://openalex.org/W2056382745","https://openalex.org/W2065400286","https://openalex.org/W850051568","https://openalex.org/W2546880863","https://openalex.org/W2767595811","https://openalex.org/W125610139","https://openalex.org/W2994169692","https://openalex.org/W2740272870","https://openalex.org/W791198478","https://openalex.org/W2012542894","https://openalex.org/W3120157709","https://openalex.org/W2899597854"],"abstract_inverted_index":{"Diacritics":[0],"(short":[1],"vowels)":[2],"are":[3,21,29,41,74,158],"typically":[4,46],"omitted":[5],"when":[6],"writing":[7],"Arabic":[8,25,131,143],"text,":[9],"and":[10,38,54,93,102,120,133,137,160,164],"readers":[11],"have":[12],"to":[13,16,70,96],"reintroduce":[14],"them":[15],"correctly":[17],"pronounce":[18],"words.":[19],"There":[20],"two":[22],"types":[23],"of":[24,51,91,118,126,135,139,171],"diacritics:":[26],"The":[27],"first":[28],"core-word":[30,67],"diacritics":[31,68,101],"(CW),":[32],"which":[33,45,73],"specify":[34,56],"the":[35,39,49,153,169],"lexical":[36],"selection,":[37],"second":[40],"case":[42,103,151],"endings":[43],"(CE),":[44],"appear":[47],"at":[48],"end":[50],"word":[52,100,148,155],"stems":[53],"generally":[55],"their":[57],"syntactic":[58],"roles.":[59],"Recovering":[60],"CEs":[61],"is":[62],"relatively":[63],"harder":[64],"than":[65],"recovering":[66],"due":[69],"inter-word":[71],"dependencies,":[72],"often":[75],"distant.":[76],"In":[77],"this":[78],"article,":[79],"we":[80],"use":[81,88],"feature-rich":[82],"recurrent":[83],"neural":[84,177],"network":[85],"model":[86,106],"that":[87],"a":[89,113,121],"variety":[90],"linguistic":[92],"surface-level":[94],"features":[95],"recover":[97],"both":[98],"core":[99],"endings.":[104],"Our":[105],"surpasses":[107],"all":[108],"previous":[109],"state-of-the-art":[110],"systems":[111],"with":[112,150],"CW":[114],"error":[115,123,156],"rate":[116,124],"(CWER)":[117],"2.9%":[119],"CE":[122],"(CEER)":[125],"3.7%":[127],"for":[128,141,162,174],"Modern":[129],"Standard":[130],"(MSA)":[132],"CWER":[134],"2.2%":[136],"CEER":[138],"2.5%":[140],"Classical":[142],"(CA).":[144],"When":[145],"combining":[146],"diacritized":[147],"cores":[149],"endings,":[152],"resultant":[154],"rates":[157],"6.0%":[159],"4.3%":[161],"MSA":[163],"CA,":[165],"respectively.":[166],"This":[167],"highlights":[168],"effectiveness":[170],"feature":[172],"engineering":[173],"such":[175],"deep":[176],"models.":[178]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
