{"id":"https://openalex.org/W2970101452","doi":"https://doi.org/10.18653/v1/w19-4602","title":"Morphology-aware Word-Segmentation in Dialectal Arabic Adaptation of Neural Machine Translation","display_name":"Morphology-aware Word-Segmentation in Dialectal Arabic Adaptation of Neural Machine Translation","publication_year":2019,"publication_date":"2019-01-01","ids":{"openalex":"https://openalex.org/W2970101452","doi":"https://doi.org/10.18653/v1/w19-4602","mag":"2970101452"},"language":"en","primary_location":{"id":"doi:10.18653/v1/w19-4602","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/w19-4602","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Fourth Arabic Natural Language Processing Workshop","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.18653/v1/w19-4602","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5048873489","display_name":"Ahmed Y. Tawfik","orcid":"https://orcid.org/0000-0003-3561-3248"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Ahmed Tawfik","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5018792271","display_name":"Mahitab Emam","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mahitab Emam","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5025845905","display_name":"Khaled Essam","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Khaled Essam","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5070450898","display_name":"Robert Nabil","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Robert Nabil","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5109522253","display_name":"Hany Hassan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hany Hassan","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5048873489"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.1201,"has_fulltext":false,"cited_by_count":12,"citation_normalized_percentile":{"value":0.83871278,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"11","last_page":"17"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13629","display_name":"Text Readability and Simplification","score":0.9686999917030334,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7622960805892944},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.6643921136856079},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6573094725608826},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.5899408459663391},{"id":"https://openalex.org/keywords/arabic","display_name":"Arabic","score":0.5726723670959473},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.5658276081085205},{"id":"https://openalex.org/keywords/machine-translation","display_name":"Machine translation","score":0.5425662398338318},{"id":"https://openalex.org/keywords/text-segmentation","display_name":"Text segmentation","score":0.5302400588989258},{"id":"https://openalex.org/keywords/modern-standard-arabic","display_name":"Modern Standard Arabic","score":0.48457610607147217},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.34783345460891724},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.2810869812965393}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7622960805892944},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6643921136856079},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6573094725608826},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.5899408459663391},{"id":"https://openalex.org/C96455323","wikidata":"https://www.wikidata.org/wiki/Q13955","display_name":"Arabic","level":2,"score":0.5726723670959473},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.5658276081085205},{"id":"https://openalex.org/C203005215","wikidata":"https://www.wikidata.org/wiki/Q79798","display_name":"Machine translation","level":2,"score":0.5425662398338318},{"id":"https://openalex.org/C98501671","wikidata":"https://www.wikidata.org/wiki/Q1948408","display_name":"Text segmentation","level":3,"score":0.5302400588989258},{"id":"https://openalex.org/C2778243841","wikidata":"https://www.wikidata.org/wiki/Q56467","display_name":"Modern Standard Arabic","level":3,"score":0.48457610607147217},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.34783345460891724},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.2810869812965393},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.18653/v1/w19-4602","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/w19-4602","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Fourth Arabic Natural Language Processing Workshop","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.18653/v1/w19-4602","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/w19-4602","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Fourth Arabic Natural Language Processing Workshop","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":20,"referenced_works":["https://openalex.org/W46679369","https://openalex.org/W137989762","https://openalex.org/W2097341304","https://openalex.org/W2156554947","https://openalex.org/W2567571499","https://openalex.org/W2726843108","https://openalex.org/W2740130688","https://openalex.org/W2754166059","https://openalex.org/W2757041753","https://openalex.org/W2902441267","https://openalex.org/W2962784628","https://openalex.org/W2963403868","https://openalex.org/W2963626623","https://openalex.org/W2963979492","https://openalex.org/W2963993537","https://openalex.org/W2964308564","https://openalex.org/W2993173751","https://openalex.org/W3170253630","https://openalex.org/W3203149905","https://openalex.org/W3204165160"],"related_works":["https://openalex.org/W4392318734","https://openalex.org/W2620283452","https://openalex.org/W4322750817","https://openalex.org/W4400896925","https://openalex.org/W2801712537","https://openalex.org/W4396221470","https://openalex.org/W587642979","https://openalex.org/W2894059694","https://openalex.org/W4299493485","https://openalex.org/W589191641"],"abstract_inverted_index":{"Parallel":[0],"corpora":[1],"available":[2],"for":[3,9],"building":[4],"machine":[5],"translation":[6],"(MT)":[7],"models":[8],"dialectal":[10,35,38],"Arabic":[11,27,74,77,81],"(DA)":[12],"are":[13],"rather":[14],"limited.":[15],"The":[16],"scarcity":[17],"of":[18,24,69],"resources":[19,30],"has":[20],"prompted":[21],"the":[22,33,96],"use":[23],"Modern":[25],"Standard":[26],"(MSA)":[28],"abundant":[29],"to":[31,53],"complement":[32],"limited":[34],"resource.":[36],"However,":[37],"clitics":[39],"often":[40],"differ":[41],"between":[42],"MSA":[43],"and":[44,63,79],"DA.":[45],"This":[46],"paper":[47],"compares":[48],"morphology-aware":[49,88],"DA":[50],"word":[51,55,98],"segmentation":[52,56,89,99],"other":[54,97],"approaches":[57],"like":[58],"Byte":[59],"Pair":[60],"Encoding":[61],"(BPE)":[62],"Sub-word":[64],"Regularization":[65],"(SR).":[66],"A":[67],"set":[68],"experiments":[70],"conducted":[71],"on":[72],"Egyptian":[73],"(EA),":[75],"Levantine":[76],"(LA),":[78],"Gulf":[80],"(GA)":[82],"show":[83],"that":[84],"a":[85],"sufficiently":[86],"accurate":[87],"used":[90],"in":[91],"conjunction":[92],"with":[93],"BPE":[94],"outperforms":[95],"approaches.":[100]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":3},{"year":2020,"cited_by_count":4}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
