{"id":"https://openalex.org/W4409772377","doi":"https://doi.org/10.1007/s00521-025-11145-1","title":"Transformers to the rescue: alleviating data scarcity in arabic grammatical error correction with pre-trained models","display_name":"Transformers to the rescue: alleviating data scarcity in arabic grammatical error correction with pre-trained models","publication_year":2025,"publication_date":"2025-04-24","ids":{"openalex":"https://openalex.org/W4409772377","doi":"https://doi.org/10.1007/s00521-025-11145-1"},"language":"en","primary_location":{"id":"doi:10.1007/s00521-025-11145-1","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s00521-025-11145-1","pdf_url":"https://link.springer.com/content/pdf/10.1007/s00521-025-11145-1.pdf","source":{"id":"https://openalex.org/S147897268","display_name":"Neural Computing and Applications","issn_l":"0941-0643","issn":["0941-0643","1433-3058"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Neural Computing and Applications","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://link.springer.com/content/pdf/10.1007/s00521-025-11145-1.pdf","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5040382641","display_name":"Karim Ismail","orcid":"https://orcid.org/0000-0003-4318-421X"},"institutions":[{"id":"https://openalex.org/I59272784","display_name":"Arab Academy for Science, Technology, and Maritime Transport","ror":"https://ror.org/0004vyj87","country_code":"EG","type":"education","lineage":["https://openalex.org/I59272784"]}],"countries":["EG"],"is_corresponding":false,"raw_author_name":"Karim Ismail","raw_affiliation_strings":["College of Computing and Information Technology, Arab Academy for Science Technology and Maritime Transport (AASTMT), Cairo, Egypt"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"College of Computing and Information Technology, Arab Academy for Science Technology and Maritime Transport (AASTMT), Cairo, Egypt","institution_ids":["https://openalex.org/I59272784"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5048958429","display_name":"Sherif Abdou","orcid":"https://orcid.org/0000-0002-8338-856X"},"institutions":[{"id":"https://openalex.org/I145487455","display_name":"Cairo University","ror":"https://ror.org/03q21mh05","country_code":"EG","type":"education","lineage":["https://openalex.org/I145487455"]}],"countries":["EG"],"is_corresponding":false,"raw_author_name":"Sherif Abdou","raw_affiliation_strings":["Faculty of Computers and Artificial Intelligence, Cairo University, Giza, Egypt"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Faculty of Computers and Artificial Intelligence, Cairo University, Giza, Egypt","institution_ids":["https://openalex.org/I145487455"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101702597","display_name":"Mohamed Farouk","orcid":"https://orcid.org/0000-0002-7015-6444"},"institutions":[{"id":"https://openalex.org/I4210086935","display_name":"Arab Academy for Science, Technology, and Maritime Transport","ror":null,"country_code":"EG","type":null,"lineage":["https://openalex.org/I4210086935"]}],"countries":["EG"],"is_corresponding":false,"raw_author_name":"Mohamed Farouk","raw_affiliation_strings":["College of Computing and Information Technology, Arab Academy for Science Technology and Maritime Transport (AASTMT), Alexandria, Egypt"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"College of Computing and Information Technology, Arab Academy for Science Technology and Maritime Transport (AASTMT), Alexandria, Egypt","institution_ids":["https://openalex.org/I4210086935"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5103045221","display_name":"Ahmed Salem","orcid":"https://orcid.org/0000-0002-0456-2276"},"institutions":[{"id":"https://openalex.org/I59272784","display_name":"Arab Academy for Science, Technology, and Maritime Transport","ror":"https://ror.org/0004vyj87","country_code":"EG","type":"education","lineage":["https://openalex.org/I59272784"]}],"countries":["EG"],"is_corresponding":true,"raw_author_name":"Ahmed Salem","raw_affiliation_strings":["College of Computing and Information Technology, Arab Academy for Science Technology and Maritime Transport (AASTMT), Cairo, Egypt"],"raw_orcid":"https://orcid.org/0000-0002-0456-2276","affiliations":[{"raw_affiliation_string":"College of Computing and Information Technology, Arab Academy for Science Technology and Maritime Transport (AASTMT), Cairo, Egypt","institution_ids":["https://openalex.org/I59272784"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5103045221"],"corresponding_institution_ids":["https://openalex.org/I59272784"],"apc_list":{"value":2390,"currency":"EUR","value_usd":2990},"apc_paid":{"value":2390,"currency":"EUR","value_usd":2990},"fwci":10.3871,"has_fulltext":true,"cited_by_count":6,"citation_normalized_percentile":{"value":0.97816263,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":95,"max":99},"biblio":{"volume":"37","issue":"18","first_page":"13011","last_page":"13038"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13629","display_name":"Text Readability and Simplification","score":0.989300012588501,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/arabic","display_name":"Arabic","score":0.6971749663352966},{"id":"https://openalex.org/keywords/computational-science-and-engineering","display_name":"Computational Science and Engineering","score":0.6363096237182617},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6223618388175964},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.5631445050239563},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.48268890380859375},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.47366079688072205},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.3407915234565735},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.32810357213020325},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.23070695996284485},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.10672315955162048},{"id":"https://openalex.org/keywords/electrical-engineering","display_name":"Electrical engineering","score":0.08748027682304382}],"concepts":[{"id":"https://openalex.org/C96455323","wikidata":"https://www.wikidata.org/wiki/Q13955","display_name":"Arabic","level":2,"score":0.6971749663352966},{"id":"https://openalex.org/C68597687","wikidata":"https://www.wikidata.org/wiki/Q362601","display_name":"Computational Science and Engineering","level":2,"score":0.6363096237182617},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6223618388175964},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.5631445050239563},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.48268890380859375},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.47366079688072205},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.3407915234565735},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.32810357213020325},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.23070695996284485},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.10672315955162048},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.08748027682304382},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1007/s00521-025-11145-1","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s00521-025-11145-1","pdf_url":"https://link.springer.com/content/pdf/10.1007/s00521-025-11145-1.pdf","source":{"id":"https://openalex.org/S147897268","display_name":"Neural Computing and Applications","issn_l":"0941-0643","issn":["0941-0643","1433-3058"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Neural Computing and Applications","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1007/s00521-025-11145-1","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s00521-025-11145-1","pdf_url":"https://link.springer.com/content/pdf/10.1007/s00521-025-11145-1.pdf","source":{"id":"https://openalex.org/S147897268","display_name":"Neural Computing and Applications","issn_l":"0941-0643","issn":["0941-0643","1433-3058"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Neural Computing and Applications","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320313982","display_name":"Arab Academy for Science, Technology and Maritime Transport","ror":"https://ror.org/0004vyj87"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4409772377.pdf","grobid_xml":"https://content.openalex.org/works/W4409772377.grobid-xml"},"referenced_works_count":49,"referenced_works":["https://openalex.org/W2250539671","https://openalex.org/W2617241599","https://openalex.org/W2832139998","https://openalex.org/W2892341857","https://openalex.org/W2896457183","https://openalex.org/W2911656629","https://openalex.org/W2911658506","https://openalex.org/W2938704169","https://openalex.org/W2947411064","https://openalex.org/W2968297680","https://openalex.org/W2999089077","https://openalex.org/W3001434439","https://openalex.org/W3008110149","https://openalex.org/W3018086583","https://openalex.org/W3023622314","https://openalex.org/W3024508854","https://openalex.org/W3026230075","https://openalex.org/W3031696893","https://openalex.org/W3041181542","https://openalex.org/W3046878418","https://openalex.org/W3092288641","https://openalex.org/W3093517588","https://openalex.org/W3095319910","https://openalex.org/W3116890009","https://openalex.org/W3121045579","https://openalex.org/W3129150706","https://openalex.org/W3136021864","https://openalex.org/W3153202096","https://openalex.org/W3176923149","https://openalex.org/W3194005300","https://openalex.org/W4200338164","https://openalex.org/W4205163778","https://openalex.org/W4210352519","https://openalex.org/W4212808971","https://openalex.org/W4221140906","https://openalex.org/W4224247818","https://openalex.org/W4281693709","https://openalex.org/W4281740565","https://openalex.org/W4287029423","https://openalex.org/W4288088047","https://openalex.org/W4297734170","https://openalex.org/W4309083387","https://openalex.org/W4309195668","https://openalex.org/W4312211453","https://openalex.org/W4378473838","https://openalex.org/W4379015399","https://openalex.org/W4381461529","https://openalex.org/W4386011557","https://openalex.org/W4389814066"],"related_works":["https://openalex.org/W4249048193","https://openalex.org/W3000360420","https://openalex.org/W1931440630","https://openalex.org/W4393232657","https://openalex.org/W3172144307","https://openalex.org/W4390638272","https://openalex.org/W4396855230","https://openalex.org/W3184247917","https://openalex.org/W2566442783","https://openalex.org/W4235657630"],"abstract_inverted_index":{"Abstract":[0],"Grammatical":[1],"error":[2],"correction":[3],"(GEC)":[4],"in":[5,76,143,148,200,207],"Arabic":[6,61,149,161],"presents":[7],"unique":[8],"challenges":[9,180],"arising":[10],"from":[11],"complex":[12],"morphology":[13],"and":[14,50,57,86,130],"contextual":[15],"intricacies.":[16],"Current":[17],"methodologies":[18],"predominantly":[19],"rely":[20],"on":[21,89,117,123,132],"neural":[22],"machine":[23],"translation":[24],"(NMT)":[25],"models,":[26,46],"hindered":[27],"by":[28,163,182],"adequately":[29],"annotated":[30],"training":[31],"data":[32,81,184],"scarcity.":[33],"This":[34,135,193],"research":[35,194],"introduces":[36],"a":[37,99,190],"novel":[38],"approach":[39,177],"utilizing":[40],"pre-trained":[41,168],"transformers,":[42,64],"specifically":[43],"sequence-to-sequence":[44],"(seq2seq)":[45],"such":[47],"as":[48,189],"AraT5":[49],"AraBART,":[51],"alongside":[52],"their":[53],"multilingual":[54],"variants":[55],"(mT5":[56],"mBART),":[58],"to":[59,156,178],"address":[60],"GEC.":[62],"These":[63],"initially":[65],"designed":[66],"for":[67,197],"diverse":[68],"natural":[69],"language":[70],"processing":[71],"tasks,":[72],"demonstrate":[73],"promising":[74],"results":[75,104],"GEC,":[77],"particularly":[78],"when":[79],"parallel":[80],"are":[82],"limited.":[83],"Employing":[84],"tokenization":[85],"preprocessing":[87],"techniques":[88],"publicly":[90],"accessible":[91],"GEC":[92,162],"datasets,":[93],"we":[94],"train":[95],"the":[96,118,124,137,140,158,173],"transformers":[97],"using":[98],"supervised":[100],"approach.":[101],"The":[102,170],"experimental":[103],"showcase":[105],"superior":[106],"performance,":[107],"surpassing":[108],"previous":[109],"models":[110],"with":[111,167,186],"an":[112],"F1":[113],"score":[114],"of":[115,139,160,175],"92.1%":[116],"QALB":[119,125],"2014":[120],"dataset,":[121],"89.4%":[122],"2015":[126],"native":[127],"test":[128],"data,":[129],"83.6%":[131],"non-native":[133],"data.":[134],"highlights":[136],"effectiveness":[138],"proposed":[141],"methodology":[142],"rectifying":[144],"various":[145],"grammatical":[146],"errors":[147],"text.":[150],"In":[151],"conclusion,":[152],"this":[153,176],"study":[154],"contributes":[155],"advancing":[157],"field":[159],"leveraging":[164],"transfer":[165],"learning":[166],"transformers.":[169],"findings":[171],"underscore":[172],"potential":[174,205],"overcome":[179],"posed":[181],"limited":[183],"availability,":[185],"AraBART":[187],"emerging":[188],"practical":[191],"choice.":[192],"opens":[195],"avenues":[196],"further":[198],"exploration":[199],"low-resource":[201],"languages.":[202],"It":[203],"suggests":[204],"applications":[206],"high-resource":[208],"languages,":[209],"encouraging":[210],"future":[211],"comparative":[212],"studies.":[213]},"counts_by_year":[{"year":2026,"cited_by_count":4},{"year":2025,"cited_by_count":2}],"updated_date":"2026-06-13T06:13:01.061226","created_date":"2025-10-10T00:00:00"}
