{"id":"https://openalex.org/W4414931019","doi":"https://doi.org/10.1145/3750720.3757282","title":"Data Readiness for Scientific AI at Scale","display_name":"Data Readiness for Scientific AI at Scale","publication_year":2025,"publication_date":"2025-09-08","ids":{"openalex":"https://openalex.org/W4414931019","doi":"https://doi.org/10.1145/3750720.3757282"},"language":"en","primary_location":{"id":"doi:10.1145/3750720.3757282","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3750720.3757282","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Workshop Proceedings of the 54th International Conference on Parallel Processing","raw_type":"proceedings-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3750720.3757282","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5018102608","display_name":"Wesley Brewer","orcid":"https://orcid.org/0000-0002-3639-3956"},"institutions":[{"id":"https://openalex.org/I1289243028","display_name":"Oak Ridge National Laboratory","ror":"https://ror.org/01qz5mb56","country_code":"US","type":"facility","lineage":["https://openalex.org/I1289243028","https://openalex.org/I1330989302","https://openalex.org/I39565521","https://openalex.org/I4210159294"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Wesley Brewer","raw_affiliation_strings":["Oak Ridge National Laboratory (ORNL), Oak Ridge, USA"],"raw_orcid":"https://orcid.org/0000-0002-3639-3956","affiliations":[{"raw_affiliation_string":"Oak Ridge National Laboratory (ORNL), Oak Ridge, USA","institution_ids":["https://openalex.org/I1289243028"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5005946388","display_name":"Patrick Widener","orcid":"https://orcid.org/0000-0002-5882-0816"},"institutions":[{"id":"https://openalex.org/I1289243028","display_name":"Oak Ridge National Laboratory","ror":"https://ror.org/01qz5mb56","country_code":"US","type":"facility","lineage":["https://openalex.org/I1289243028","https://openalex.org/I1330989302","https://openalex.org/I39565521","https://openalex.org/I4210159294"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Patrick Widener","raw_affiliation_strings":["Oak Ridge National Laboratory (ORNL), Oak Ridge, USA"],"raw_orcid":"https://orcid.org/0000-0002-5882-0816","affiliations":[{"raw_affiliation_string":"Oak Ridge National Laboratory (ORNL), Oak Ridge, USA","institution_ids":["https://openalex.org/I1289243028"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026632303","display_name":"Valentine Anantharaj","orcid":"https://orcid.org/0000-0002-9356-1311"},"institutions":[{"id":"https://openalex.org/I1289243028","display_name":"Oak Ridge National Laboratory","ror":"https://ror.org/01qz5mb56","country_code":"US","type":"facility","lineage":["https://openalex.org/I1289243028","https://openalex.org/I1330989302","https://openalex.org/I39565521","https://openalex.org/I4210159294"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Valentine Anantharaj","raw_affiliation_strings":["Oak Ridge National Laboratory (ORNL), Oak Ridge, USA"],"raw_orcid":"https://orcid.org/0000-0002-9356-1311","affiliations":[{"raw_affiliation_string":"Oak Ridge National Laboratory (ORNL), Oak Ridge, USA","institution_ids":["https://openalex.org/I1289243028"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101916963","display_name":"Feiyi Wang","orcid":"https://orcid.org/0000-0002-0099-1559"},"institutions":[{"id":"https://openalex.org/I1289243028","display_name":"Oak Ridge National Laboratory","ror":"https://ror.org/01qz5mb56","country_code":"US","type":"facility","lineage":["https://openalex.org/I1289243028","https://openalex.org/I1330989302","https://openalex.org/I39565521","https://openalex.org/I4210159294"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Feiyi Wang","raw_affiliation_strings":["Oak Ridge National Laboratory (ORNL), Oak Ridge, USA"],"raw_orcid":"https://orcid.org/0000-0002-0099-1559","affiliations":[{"raw_affiliation_string":"Oak Ridge National Laboratory (ORNL), Oak Ridge, USA","institution_ids":["https://openalex.org/I1289243028"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5016930081","display_name":"Thomas L. Beck","orcid":"https://orcid.org/0000-0001-8973-7145"},"institutions":[{"id":"https://openalex.org/I1289243028","display_name":"Oak Ridge National Laboratory","ror":"https://ror.org/01qz5mb56","country_code":"US","type":"facility","lineage":["https://openalex.org/I1289243028","https://openalex.org/I1330989302","https://openalex.org/I39565521","https://openalex.org/I4210159294"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Tom Beck","raw_affiliation_strings":["Oak Ridge National Laboratory (ORNL), Oak Ridge, USA"],"raw_orcid":"https://orcid.org/0000-0001-8973-7145","affiliations":[{"raw_affiliation_string":"Oak Ridge National Laboratory (ORNL), Oak Ridge, USA","institution_ids":["https://openalex.org/I1289243028"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014960590","display_name":"Arjun Shankar","orcid":null},"institutions":[{"id":"https://openalex.org/I1289243028","display_name":"Oak Ridge National Laboratory","ror":"https://ror.org/01qz5mb56","country_code":"US","type":"facility","lineage":["https://openalex.org/I1289243028","https://openalex.org/I1330989302","https://openalex.org/I39565521","https://openalex.org/I4210159294"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Arjun Shankar","raw_affiliation_strings":["Oak Ridge National Laboratory (ORNL), Oak Ridge, USA"],"raw_orcid":"https://orcid.org/0000-0001-5289-7460","affiliations":[{"raw_affiliation_string":"Oak Ridge National Laboratory (ORNL), Oak Ridge, USA","institution_ids":["https://openalex.org/I1289243028"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5014538652","display_name":"Sarp Oral","orcid":"https://orcid.org/0000-0001-8745-7078"},"institutions":[{"id":"https://openalex.org/I1289243028","display_name":"Oak Ridge National Laboratory","ror":"https://ror.org/01qz5mb56","country_code":"US","type":"facility","lineage":["https://openalex.org/I1289243028","https://openalex.org/I1330989302","https://openalex.org/I39565521","https://openalex.org/I4210159294"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Sarp Oral","raw_affiliation_strings":["Oak Ridge National Laboratory (ORNL), Oak Ridge, USA"],"raw_orcid":"https://orcid.org/0000-0001-8745-7078","affiliations":[{"raw_affiliation_string":"Oak Ridge National Laboratory (ORNL), Oak Ridge, USA","institution_ids":["https://openalex.org/I1289243028"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5018102608"],"corresponding_institution_ids":["https://openalex.org/I1289243028"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.44111616,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"18","last_page":"24"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11891","display_name":"Big Data and Business Intelligence","score":0.9602000117301941,"subfield":{"id":"https://openalex.org/subfields/1404","display_name":"Management Information Systems"},"field":{"id":"https://openalex.org/fields/14","display_name":"Business, Management and Accounting"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11891","display_name":"Big Data and Business Intelligence","score":0.9602000117301941,"subfield":{"id":"https://openalex.org/subfields/1404","display_name":"Management Information Systems"},"field":{"id":"https://openalex.org/fields/14","display_name":"Business, Management and Accounting"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/workflow","display_name":"Workflow","score":0.6830999851226807},{"id":"https://openalex.org/keywords/scale","display_name":"Scale (ratio)","score":0.5123000144958496},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.47620001435279846},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.4564000070095062},{"id":"https://openalex.org/keywords/preprocessor","display_name":"Preprocessor","score":0.45419999957084656},{"id":"https://openalex.org/keywords/data-processing","display_name":"Data processing","score":0.413100004196167},{"id":"https://openalex.org/keywords/big-data","display_name":"Big data","score":0.3813999891281128},{"id":"https://openalex.org/keywords/maturity","display_name":"Maturity (psychological)","score":0.3547999858856201}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7002999782562256},{"id":"https://openalex.org/C177212765","wikidata":"https://www.wikidata.org/wiki/Q627335","display_name":"Workflow","level":2,"score":0.6830999851226807},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.6273999810218811},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.5123000144958496},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.47620001435279846},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.4564000070095062},{"id":"https://openalex.org/C34736171","wikidata":"https://www.wikidata.org/wiki/Q918333","display_name":"Preprocessor","level":2,"score":0.45419999957084656},{"id":"https://openalex.org/C138827492","wikidata":"https://www.wikidata.org/wiki/Q6661985","display_name":"Data processing","level":2,"score":0.413100004196167},{"id":"https://openalex.org/C75684735","wikidata":"https://www.wikidata.org/wiki/Q858810","display_name":"Big data","level":2,"score":0.3813999891281128},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3619000017642975},{"id":"https://openalex.org/C101433766","wikidata":"https://www.wikidata.org/wiki/Q3543263","display_name":"Maturity (psychological)","level":2,"score":0.3547999858856201},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.34279999136924744},{"id":"https://openalex.org/C10551718","wikidata":"https://www.wikidata.org/wiki/Q5227332","display_name":"Data pre-processing","level":2,"score":0.3361000120639801},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3222000002861023},{"id":"https://openalex.org/C139807058","wikidata":"https://www.wikidata.org/wiki/Q352374","display_name":"Adaptation (eye)","level":2,"score":0.29989999532699585},{"id":"https://openalex.org/C168167062","wikidata":"https://www.wikidata.org/wiki/Q1117970","display_name":"Component (thermodynamics)","level":2,"score":0.29319998621940613},{"id":"https://openalex.org/C2778012447","wikidata":"https://www.wikidata.org/wiki/Q1034415","display_name":"Scope (computer science)","level":2,"score":0.2851000130176544},{"id":"https://openalex.org/C9652623","wikidata":"https://www.wikidata.org/wiki/Q190109","display_name":"Field (mathematics)","level":2,"score":0.2759000062942505},{"id":"https://openalex.org/C48677424","wikidata":"https://www.wikidata.org/wiki/Q6888088","display_name":"Mode (computer interface)","level":2,"score":0.2741999924182892},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.2720000147819519},{"id":"https://openalex.org/C2776397876","wikidata":"https://www.wikidata.org/wiki/Q1450531","display_name":"Cyberinfrastructure","level":2,"score":0.2696000039577484},{"id":"https://openalex.org/C138958017","wikidata":"https://www.wikidata.org/wiki/Q190087","display_name":"Data type","level":2,"score":0.2596000134944916}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3750720.3757282","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3750720.3757282","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Workshop Proceedings of the 54th International Conference on Parallel Processing","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2507.23018","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2507.23018","pdf_url":"https://arxiv.org/pdf/2507.23018","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"doi:10.1145/3750720.3757282","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3750720.3757282","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Workshop Proceedings of the 54th International Conference on Parallel Processing","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"This":[0,69],"paper":[1],"examines":[2],"how":[3],"Data":[4,48,55],"Readiness":[5,49],"for":[6,78,107,112],"AI":[7,80,111],"(DRAI)":[8],"principles":[9],"apply":[10],"to":[11,16,52,59,63],"leadership-scale":[12],"scientific":[13,76,96],"datasets":[14],"used":[15],"train":[17],"foundation":[18],"models.":[19,85],"We":[20,40],"analyze":[21],"archetypal":[22],"workflows":[23],"across":[24],"four":[25],"representative":[26],"domains\u2014climate,":[27],"nuclear":[28],"fusion,":[29],"bio/health,":[30],"and":[31,37,54,99,109],"materials\u2014to":[32],"identify":[33],"common":[34],"preprocessing":[35],"patterns":[36],"domain-specific":[38],"constraints.":[39],"introduce":[41],"a":[42,90],"two-dimensional":[43],"readiness":[44,98],"framework":[45,70],"composed":[46],"of":[47],"Levels":[50],"(raw":[51],"AI-ready)":[53],"Processing":[56],"Stages":[57],"(ingest":[58],"shard),":[60],"both":[61],"tailored":[62],"high":[64],"performance":[65],"computing":[66],"(HPC)":[67],"environments.":[68],"outlines":[71],"key":[72],"challenges":[73],"in":[74],"transforming":[75],"data":[77,97],"scalable":[79,108],"training,":[81],"emphasizing":[82],"transformer-based":[83],"generative":[84],"Together,":[86],"these":[87],"dimensions":[88],"form":[89],"conceptual":[91],"maturity":[92],"matrix":[93],"that":[94],"characterizes":[95],"guides":[100],"infrastructure":[101],"development":[102],"toward":[103],"standardized,":[104],"cross-domain":[105],"support":[106],"reproducible":[110],"science.":[113]},"counts_by_year":[],"updated_date":"2025-12-21T23:12:01.093139","created_date":"2025-10-10T00:00:00"}
