{"id":"https://openalex.org/W7148556838","doi":"https://doi.org/10.48550/arxiv.2604.00015","title":"ASCAT: An Arabic Scientific Corpus and Benchmark for Advanced Translation Evaluation","display_name":"ASCAT: An Arabic Scientific Corpus and Benchmark for Advanced Translation Evaluation","publication_year":2026,"publication_date":"2026-03-10","ids":{"openalex":"https://openalex.org/W7148556838","doi":"https://doi.org/10.48550/arxiv.2604.00015"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.00015","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.00015","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.00015","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5093081405","display_name":"Serry Sibaee","orcid":"https://orcid.org/0009-0009-5649-4111"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Sibaee, Serry","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5088312471","display_name":"Khloud Al Jallad","orcid":"https://orcid.org/0000-0001-9474-9204"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jallad, Khloud Al","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132830347","display_name":"Zineb Yousfi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yousfi, Zineb","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132794825","display_name":"Israa Elsayed Elhosiny","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Elhosiny, Israa Elsayed","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5107377572","display_name":"Yousra El-Ghawi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"El-Ghawi, Yousra","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5120309428","display_name":"Batool Najeh Balah","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Balah, Batool","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5093933812","display_name":"Omer Nacar","orcid":"https://orcid.org/0000-0001-7493-9318"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Nacar, Omer","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5093081405"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.5608999729156494,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.5608999729156494,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.1151999980211258,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.04650000110268593,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.715399980545044},{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.5728999972343445},{"id":"https://openalex.org/keywords/vocabulary","display_name":"Vocabulary","score":0.531000018119812},{"id":"https://openalex.org/keywords/machine-translation","display_name":"Machine translation","score":0.5202000141143799},{"id":"https://openalex.org/keywords/generative-grammar","display_name":"Generative grammar","score":0.5023999810218811},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.43970000743865967},{"id":"https://openalex.org/keywords/annotation","display_name":"Annotation","score":0.43700000643730164},{"id":"https://openalex.org/keywords/translation","display_name":"Translation (biology)","score":0.43630000948905945}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7825000286102295},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.7444999814033508},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.715399980545044},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7017999887466431},{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.5728999972343445},{"id":"https://openalex.org/C2777601683","wikidata":"https://www.wikidata.org/wiki/Q6499736","display_name":"Vocabulary","level":2,"score":0.531000018119812},{"id":"https://openalex.org/C203005215","wikidata":"https://www.wikidata.org/wiki/Q79798","display_name":"Machine translation","level":2,"score":0.5202000141143799},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.5023999810218811},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.43970000743865967},{"id":"https://openalex.org/C2776321320","wikidata":"https://www.wikidata.org/wiki/Q857525","display_name":"Annotation","level":2,"score":0.43700000643730164},{"id":"https://openalex.org/C149364088","wikidata":"https://www.wikidata.org/wiki/Q185917","display_name":"Translation (biology)","level":4,"score":0.43630000948905945},{"id":"https://openalex.org/C96455323","wikidata":"https://www.wikidata.org/wiki/Q13955","display_name":"Arabic","level":2,"score":0.3781999945640564},{"id":"https://openalex.org/C2474386","wikidata":"https://www.wikidata.org/wiki/Q461183","display_name":"Text corpus","level":2,"score":0.3621000051498413},{"id":"https://openalex.org/C155092808","wikidata":"https://www.wikidata.org/wiki/Q182557","display_name":"Computational linguistics","level":2,"score":0.34060001373291016},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.3398999869823456},{"id":"https://openalex.org/C532629269","wikidata":"https://www.wikidata.org/wiki/Q865083","display_name":"Corpus linguistics","level":2,"score":0.3357999920845032},{"id":"https://openalex.org/C80023036","wikidata":"https://www.wikidata.org/wiki/Q5147531","display_name":"Collocation (remote sensing)","level":2,"score":0.323199987411499},{"id":"https://openalex.org/C2778143727","wikidata":"https://www.wikidata.org/wiki/Q1820650","display_name":"Readability","level":2,"score":0.3149000108242035},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.3091999888420105},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.2874000072479248},{"id":"https://openalex.org/C53893814","wikidata":"https://www.wikidata.org/wiki/Q7378909","display_name":"Rule-based machine translation","level":2,"score":0.2809999883174896},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.26840001344680786},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.2597000002861023},{"id":"https://openalex.org/C2779304628","wikidata":"https://www.wikidata.org/wiki/Q3503480","display_name":"Face (sociological concept)","level":2,"score":0.2549000084400177}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.00015","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.00015","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.00015","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.00015","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"display_name":"Reduced inequalities","id":"https://metadata.un.org/sdg/10","score":0.5428781509399414}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"We":[0,131],"present":[1],"ASCAT":[2,42,157],"(Arabic":[3],"Scientific":[4],"Corpus":[5],"for":[6,16,166],"Advanced":[7],"Translation),":[8],"a":[9,22,159],"high-quality":[10],"English-Arabic":[11],"parallel":[12],"benchmark":[13,132],"corpus":[14,107,138],"designed":[15,170],"scientific":[17,45,58,163,176],"translation":[18,25,177,183],"evaluation":[19,155,174],"constructed":[20],"through":[21],"systematic":[23],"multi-engine":[24],"and":[26,51,66,85,92,102,112,145,168,179],"human":[27],"validation":[28],"pipeline.":[29],"Unlike":[30],"existing":[31],"Arabic-English":[32],"corpora":[33],"that":[34],"rely":[35],"on":[36,136],"short":[37],"sentences":[38],"or":[39],"single-domain":[40],"text,":[41],"targets":[43],"full":[44],"abstracts":[46],"averaging":[47],"141.7":[48],"words":[49,53,123],"(English)":[50],"111.78":[52],"(Arabic),":[54],"drawn":[55],"from":[56],"five":[57],"domains:":[59],"physics,":[60],"mathematics,":[61],"computer":[62],"science,":[63],"quantum":[64],"mechanics,":[65],"artificial":[67],"intelligence.":[68],"Each":[69],"abstract":[70],"was":[71],"translated":[72],"using":[73],"three":[74,133],"complementary":[75],"architectures":[76],"generative":[77],"AI":[78],"(Gemini),":[79],"transformer-based":[80],"models":[81],"(Hugging":[82],"Face":[83],"\\texttt{quickmt-en-ar}),":[84],"commercial":[86],"MT":[87,164],"APIs":[88],"(Google":[89],"Translate,":[90],"DeepL)":[91],"subsequently":[93],"validated":[94],"by":[95],"domain":[96],"experts":[97],"at":[98],"the":[99,125,129,137],"lexical,":[100],"syntactic,":[101],"semantic":[103],"levels.":[104],"The":[105],"resulting":[106],"contains":[108],"67,293":[109],"English":[110],"tokens":[111],"60,026":[113],"Arabic":[114,118,167],"tokens,":[115],"with":[116],"an":[117,154],"vocabulary":[119],"of":[120,128,175,181],"17,604":[121],"unique":[122],"reflecting":[124],"morphological":[126],"richness":[127],"language.":[130],"state-of-the-art":[134],"LLMs":[135],"GPT-4o-mini":[139],"(BLEU:":[140,143,147],"37.07),":[141],"Gemini-3.0-Flash-Preview":[142],"30.44),":[144],"Qwen3-235B-A22B":[146],"23.68)":[148],"demonstrating":[149],"its":[150],"discriminative":[151],"power":[152],"as":[153],"benchmark.":[156],"addresses":[158],"critical":[160],"gap":[161],"in":[162],"resources":[165],"is":[169],"to":[171],"support":[172],"rigorous":[173],"quality":[178],"training":[180],"domain-specific":[182],"models.":[184]},"counts_by_year":[],"updated_date":"2026-04-03T16:44:17.987007","created_date":"2026-04-03T00:00:00"}
