{"id":"https://openalex.org/W6967591390","doi":"https://doi.org/10.5281/zenodo.11467483","title":"F-DATA: A Fugaku Workload Dataset for Job-centric Predictive Modelling in HPC Systems","display_name":"F-DATA: A Fugaku Workload Dataset for Job-centric Predictive Modelling in HPC Systems","publication_year":2024,"publication_date":"2024-06-05","ids":{"openalex":"https://openalex.org/W6967591390","doi":"https://doi.org/10.5281/zenodo.11467483"},"language":"en","primary_location":{"id":"pmh:oai:cris.unibo.it:11585/1013491","is_oa":true,"landing_page_url":"https://zenodo.org/records/11467483","pdf_url":null,"source":{"id":"https://openalex.org/S4306402579","display_name":"Archivio istituzionale della ricerca (Alma Mater Studiorum Universit\u00e0 di Bologna)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210117483","host_organization_name":"Istituto di Ematologia di Bologna","host_organization_lineage":["https://openalex.org/I4210117483"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"info:eu-repo/semantics/other"},"type":"dataset","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://zenodo.org/records/11467483","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Antici, Francesco","orcid":"https://orcid.org/0000-0002-1125-0588"},"institutions":[{"id":"https://openalex.org/I9360294","display_name":"University of Bologna","ror":"https://ror.org/01111rn36","country_code":"IT","type":"education","lineage":["https://openalex.org/I9360294"]}],"countries":["IT"],"is_corresponding":true,"raw_author_name":"Antici, Francesco","raw_affiliation_strings":["University of Bologna"],"affiliations":[{"raw_affiliation_string":"University of Bologna","institution_ids":["https://openalex.org/I9360294"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Bartolini, Andrea","orcid":"https://orcid.org/0000-0002-1148-2450"},"institutions":[{"id":"https://openalex.org/I9360294","display_name":"University of Bologna","ror":"https://ror.org/01111rn36","country_code":"IT","type":"education","lineage":["https://openalex.org/I9360294"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Bartolini, Andrea","raw_affiliation_strings":["University of Bologna"],"affiliations":[{"raw_affiliation_string":"University of Bologna","institution_ids":["https://openalex.org/I9360294"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Domke, Jens","orcid":"https://orcid.org/0000-0002-5343-414X"},"institutions":[{"id":"https://openalex.org/I4210129730","display_name":"RIKEN Center for Computational Science","ror":"https://ror.org/03r519674","country_code":"JP","type":"facility","lineage":["https://openalex.org/I4210110652","https://openalex.org/I4210129730"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Domke, Jens","raw_affiliation_strings":["RIKEN Center for Computational Science"],"affiliations":[{"raw_affiliation_string":"RIKEN Center for Computational Science","institution_ids":["https://openalex.org/I4210129730"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Kiziltan, Zeynep","orcid":"https://orcid.org/0000-0003-0412-4396"},"institutions":[{"id":"https://openalex.org/I9360294","display_name":"University of Bologna","ror":"https://ror.org/01111rn36","country_code":"IT","type":"education","lineage":["https://openalex.org/I9360294"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Kiziltan, Zeynep","raw_affiliation_strings":["University of Bologna"],"affiliations":[{"raw_affiliation_string":"University of Bologna","institution_ids":["https://openalex.org/I9360294"]}]},{"author_position":"last","author":{"id":null,"display_name":"Yamamoto, Keiji","orcid":null},"institutions":[{"id":"https://openalex.org/I4210129730","display_name":"RIKEN Center for Computational Science","ror":"https://ror.org/03r519674","country_code":"JP","type":"facility","lineage":["https://openalex.org/I4210110652","https://openalex.org/I4210129730"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Yamamoto, Keiji","raw_affiliation_strings":["RIKEN Center for Computational Science"],"affiliations":[{"raw_affiliation_string":"RIKEN Center for Computational Science","institution_ids":["https://openalex.org/I4210129730"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I9360294"],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":true,"primary_topic":null,"topics":[],"keywords":[{"id":"https://openalex.org/keywords/workload","display_name":"Workload","score":0.75},{"id":"https://openalex.org/keywords/scripting-language","display_name":"Scripting language","score":0.5630000233650208},{"id":"https://openalex.org/keywords/python","display_name":"Python (programming language)","score":0.5267000198364258},{"id":"https://openalex.org/keywords/supercomputer","display_name":"Supercomputer","score":0.5027999877929688},{"id":"https://openalex.org/keywords/installation","display_name":"Installation","score":0.4530999958515167},{"id":"https://openalex.org/keywords/data-file","display_name":"Data file","score":0.4422000050544739},{"id":"https://openalex.org/keywords/file-system","display_name":"File system","score":0.4023999869823456},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.3555999994277954}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8220999836921692},{"id":"https://openalex.org/C2778476105","wikidata":"https://www.wikidata.org/wiki/Q628539","display_name":"Workload","level":2,"score":0.75},{"id":"https://openalex.org/C61423126","wikidata":"https://www.wikidata.org/wiki/Q187432","display_name":"Scripting language","level":2,"score":0.5630000233650208},{"id":"https://openalex.org/C519991488","wikidata":"https://www.wikidata.org/wiki/Q28865","display_name":"Python (programming language)","level":2,"score":0.5267000198364258},{"id":"https://openalex.org/C83283714","wikidata":"https://www.wikidata.org/wiki/Q121117","display_name":"Supercomputer","level":2,"score":0.5027999877929688},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.4812999963760376},{"id":"https://openalex.org/C146778888","wikidata":"https://www.wikidata.org/wiki/Q836862","display_name":"Installation","level":2,"score":0.4530999958515167},{"id":"https://openalex.org/C171730128","wikidata":"https://www.wikidata.org/wiki/Q5227290","display_name":"Data file","level":2,"score":0.4422000050544739},{"id":"https://openalex.org/C2780940931","wikidata":"https://www.wikidata.org/wiki/Q174989","display_name":"File system","level":2,"score":0.4023999869823456},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.36340001225471497},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.3555999994277954},{"id":"https://openalex.org/C58489278","wikidata":"https://www.wikidata.org/wiki/Q1172284","display_name":"Data set","level":2,"score":0.337799996137619},{"id":"https://openalex.org/C128595289","wikidata":"https://www.wikidata.org/wiki/Q131140","display_name":"COBOL","level":2,"score":0.33329999446868896},{"id":"https://openalex.org/C172658912","wikidata":"https://www.wikidata.org/wiki/Q661613","display_name":"Batch processing","level":2,"score":0.30630001425743103},{"id":"https://openalex.org/C95637964","wikidata":"https://www.wikidata.org/wiki/Q82753","display_name":"Computer file","level":2,"score":0.2881999909877777},{"id":"https://openalex.org/C2984118289","wikidata":"https://www.wikidata.org/wiki/Q29954","display_name":"Power consumption","level":3,"score":0.2809000015258789},{"id":"https://openalex.org/C97250363","wikidata":"https://www.wikidata.org/wiki/Q235557","display_name":"File format","level":2,"score":0.27250000834465027},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.26910001039505005},{"id":"https://openalex.org/C75684735","wikidata":"https://www.wikidata.org/wiki/Q858810","display_name":"Big data","level":2,"score":0.26330000162124634},{"id":"https://openalex.org/C47487241","wikidata":"https://www.wikidata.org/wiki/Q5227230","display_name":"Data access","level":2,"score":0.2628999948501587},{"id":"https://openalex.org/C162319229","wikidata":"https://www.wikidata.org/wiki/Q175263","display_name":"Data structure","level":2,"score":0.2587999999523163},{"id":"https://openalex.org/C177212765","wikidata":"https://www.wikidata.org/wiki/Q627335","display_name":"Workflow","level":2,"score":0.25859999656677246},{"id":"https://openalex.org/C180198813","wikidata":"https://www.wikidata.org/wiki/Q121182","display_name":"Information system","level":2,"score":0.2538999915122986},{"id":"https://openalex.org/C111873713","wikidata":"https://www.wikidata.org/wiki/Q1641413","display_name":"Job scheduler","level":3,"score":0.25380000472068787}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:cris.unibo.it:11585/1013491","is_oa":true,"landing_page_url":"https://zenodo.org/records/11467483","pdf_url":null,"source":{"id":"https://openalex.org/S4306402579","display_name":"Archivio istituzionale della ricerca (Alma Mater Studiorum Universit\u00e0 di Bologna)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210117483","host_organization_name":"Istituto di Ematologia di Bologna","host_organization_lineage":["https://openalex.org/I4210117483"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"info:eu-repo/semantics/other"},{"id":"doi:10.5281/zenodo.11467483","is_oa":true,"landing_page_url":"https://doi.org/10.5281/zenodo.11467483","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"dataset"}],"best_oa_location":{"id":"pmh:oai:cris.unibo.it:11585/1013491","is_oa":true,"landing_page_url":"https://zenodo.org/records/11467483","pdf_url":null,"source":{"id":"https://openalex.org/S4306402579","display_name":"Archivio istituzionale della ricerca (Alma Mater Studiorum Universit\u00e0 di Bologna)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210117483","host_organization_name":"Istituto di Ematologia di Bologna","host_organization_lineage":["https://openalex.org/I4210117483"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"info:eu-repo/semantics/other"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/8","display_name":"Decent work and economic growth","score":0.5588262677192688}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"F-DATA":[0,124,141,169],"is":[1,91,142,176],"a":[2,61,94,129,220],"novel":[3],"workload":[4],"dataset":[5,119],"containing":[6,151],"the":[7,19,76,118,123,139,152,155,159,163,186,202,216],"data":[8,31,81,111,153],"of":[9,22,36,63,70,131,144,154,162,168],"around":[10],"24":[11],"million":[12],"jobs":[13,156],"executed":[14],"on":[15,93,135],"Supercomputer":[16],"Fugaku,":[17],"over":[18],"three":[20],"years":[21],"public":[23],"system":[24],"usage":[25],"(March":[26],"2021-April":[27],"2024).":[28],"Each":[29],"job":[30,64,104],"contains":[32],"an":[33],"extensive":[34],"set":[35],"features,":[37],"such":[38,180],"as":[39,172,182,212],"exit":[40],"code,":[41],"duration,":[42],"power":[43],"consumption":[44],"and":[45,54,86,99,133,247,250,253,256],"performance":[46],"metrics":[47],"(e.g.":[48],"#flops,":[49],"memory":[50],"bandwidth,":[51],"operational":[52],"intensity":[53],"memory/compute":[55],"bound":[56],"label),":[57],"which":[58],"allows":[59],"for":[60,106,238],"multitude":[62],"characteristics":[65],"prediction.":[66],"The":[67,79,89,113,166],"full":[68],"list":[69],"features":[71],"can":[72,198],"be":[73,199],"found":[74],"in":[75,84,122,158,219,242],"file":[77,150,197,218],"feature_list.csv.":[78],"sensitive":[80,101],"appears":[82],"both":[83],"anonymized":[85],"encoded":[87],"versions.":[88],"encoding":[90],"based":[92],"Natural":[95],"Language":[96],"Processing":[97],"model":[98],"retains":[100],"but":[102],"useful":[103],"information":[105],"prediction":[107],"purposes,":[108],"without":[109],"violating":[110],"privacy.":[112],"scripts":[114],"used":[115],"to":[116,137,178],"generate":[117],"are":[120,170],"available":[121],"GitHub":[125],"repository,":[126],"along":[127],"with":[128,147,201],"series":[130],"plots":[132],"instruction":[134],"how":[136],"load":[138,179],"data.":[140],"composed":[143],"38":[145],"files,":[146],"each":[148],"YY_MM.parquet":[149],"submitted":[157],"month":[160],"MM":[161],"year":[164],"YY.":[165],"files":[167,181],"saved":[171],".parquet":[173],"files.":[174],"It":[175],"possible":[177],"dataframes":[183],"by":[184],"leveraging":[185],"pandas":[187,208,211],"APIs,":[188],"after":[189],"installing":[190],"pyarrow":[191],"(pip":[192],"install":[193],"pyarrow).":[194],"A":[195,234],"single":[196],"read":[200],"following":[203],"Python":[204],"instrcutions:":[205],"#":[206,214],"Importing":[207],"library":[209],"import":[210],"pd":[213],"Read":[215],"21_01.parquet":[217],"dataframe":[221],"format":[222],"df":[223],"=":[224,260],"pd.read_parquet(\"21_01.parquet\")":[225],"df.head()":[226],"Please":[227],"cite":[228],"this":[229],"work":[230],"as:":[231],"@article{antici2025fdata,":[232],"title={F-DATA:":[233],"Fugaku":[235],"Workload":[236],"Dataset":[237],"Job-centric":[239],"Predictive":[240],"Modelling":[241],"HPC":[243],"Systems},":[244],"author={Antici,":[245],"Francesco":[246],"Bartolini,":[248],"Andrea":[249],"Domke,":[251],"Jens":[252],"Kiziltan,":[254],"Zeynep":[255],"Yamamoto,":[257],"Keiji},":[258],"journal":[259],"{Scientific":[261],"Data},":[262],"volume={12},":[263],"pages={1321},":[264],"year={2025},":[265],"publisher={Nature":[266],"Publishing":[267],"Group},":[268],"doi={https://doi.org/10.1038/s41597-025-05633-1}":[269],"}":[270]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":2}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
