{"id":"https://openalex.org/W4416962962","doi":"https://doi.org/10.1109/sbac-pad66369.2025.00028","title":"A Framework for Analytical Performance and Energy Prediction of DL Training on GPUs","display_name":"A Framework for Analytical Performance and Energy Prediction of DL Training on GPUs","publication_year":2025,"publication_date":"2025-10-28","ids":{"openalex":"https://openalex.org/W4416962962","doi":"https://doi.org/10.1109/sbac-pad66369.2025.00028"},"language":"en","primary_location":{"id":"doi:10.1109/sbac-pad66369.2025.00028","is_oa":false,"landing_page_url":"https://doi.org/10.1109/sbac-pad66369.2025.00028","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/SBC 37th International Symposium on Computer Architecture and High Performance Computing (SBAC-PAD)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5092356330","display_name":"Roblex Nana Tchakoute","orcid":"https://orcid.org/0009-0009-8636-8466"},"institutions":[{"id":"https://openalex.org/I4210115519","display_name":"Centre de Recherche en Informatique","ror":"https://ror.org/020cdve92","country_code":"FR","type":"facility","lineage":["https://openalex.org/I190752583","https://openalex.org/I2746051580","https://openalex.org/I4210091621","https://openalex.org/I4210115519","https://openalex.org/I70768539"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Roblex Nana Tchakoute","raw_affiliation_strings":["Mines Paris - PSL,Centre de Recherche en Informatique (CRI),Fontainebleau,France"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Mines Paris - PSL,Centre de Recherche en Informatique (CRI),Fontainebleau,France","institution_ids":["https://openalex.org/I4210115519"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057912061","display_name":"Claude Tadonki","orcid":"https://orcid.org/0000-0003-1194-6400"},"institutions":[{"id":"https://openalex.org/I4210115519","display_name":"Centre de Recherche en Informatique","ror":"https://ror.org/020cdve92","country_code":"FR","type":"facility","lineage":["https://openalex.org/I190752583","https://openalex.org/I2746051580","https://openalex.org/I4210091621","https://openalex.org/I4210115519","https://openalex.org/I70768539"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Claude Tadonki","raw_affiliation_strings":["Mines Paris - PSL,Centre de Recherche en Informatique (CRI),Fontainebleau,France"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Mines Paris - PSL,Centre de Recherche en Informatique (CRI),Fontainebleau,France","institution_ids":["https://openalex.org/I4210115519"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5000823984","display_name":"Petr Dokl\u00e1dal","orcid":"https://orcid.org/0000-0002-6502-7461"},"institutions":[{"id":"https://openalex.org/I70768539","display_name":"\u00c9cole Nationale Sup\u00e9rieure des Mines de Paris","ror":"https://ror.org/04y8cs423","country_code":"FR","type":"education","lineage":["https://openalex.org/I190752583","https://openalex.org/I2746051580","https://openalex.org/I70768539"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Petr Dokladal","raw_affiliation_strings":["Mines Paris - PSL,Centre de Morphologie Math&#x00E9;matique (CMM),Fontainebleau,France"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Mines Paris - PSL,Centre de Morphologie Math&#x00E9;matique (CMM),Fontainebleau,France","institution_ids":["https://openalex.org/I70768539"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5073299350","display_name":"Youssef Mesri","orcid":"https://orcid.org/0000-0002-5136-5435"},"institutions":[{"id":"https://openalex.org/I4210094701","display_name":"Centre de Mise en Forme des Mat\u00e9riaux","ror":"https://ror.org/00qm1ye08","country_code":"FR","type":"facility","lineage":["https://openalex.org/I1294671590","https://openalex.org/I1294671590","https://openalex.org/I190752583","https://openalex.org/I2746051580","https://openalex.org/I4210091621","https://openalex.org/I4210094701","https://openalex.org/I4210095849","https://openalex.org/I70768539"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Youssef Mesri","raw_affiliation_strings":["Mines Paris - PSL,Centre de Mise en Forme de Mat&#x00E9;riaux (CEMEF),Sophia Antipolis,France"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Mines Paris - PSL,Centre de Mise en Forme de Mat&#x00E9;riaux (CEMEF),Sophia Antipolis,France","institution_ids":["https://openalex.org/I4210094701"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.3772242,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"215","last_page":"226"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.7396000027656555,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.7396000027656555,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T14347","display_name":"Big Data and Digital Economy","score":0.06989999860525131,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.04129999876022339,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/granularity","display_name":"Granularity","score":0.6959999799728394},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.6711000204086304},{"id":"https://openalex.org/keywords/flexibility","display_name":"Flexibility (engineering)","score":0.5800999999046326},{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.52920001745224},{"id":"https://openalex.org/keywords/workload","display_name":"Workload","score":0.5264999866485596},{"id":"https://openalex.org/keywords/benchmarking","display_name":"Benchmarking","score":0.4765999913215637},{"id":"https://openalex.org/keywords/resource","display_name":"Resource (disambiguation)","score":0.46320000290870667},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.44029998779296875},{"id":"https://openalex.org/keywords/performance-prediction","display_name":"Performance prediction","score":0.4316999912261963}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7809000015258789},{"id":"https://openalex.org/C177774035","wikidata":"https://www.wikidata.org/wiki/Q1246948","display_name":"Granularity","level":2,"score":0.6959999799728394},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.6711000204086304},{"id":"https://openalex.org/C2780598303","wikidata":"https://www.wikidata.org/wiki/Q65921492","display_name":"Flexibility (engineering)","level":2,"score":0.5800999999046326},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.52920001745224},{"id":"https://openalex.org/C2778476105","wikidata":"https://www.wikidata.org/wiki/Q628539","display_name":"Workload","level":2,"score":0.5264999866485596},{"id":"https://openalex.org/C86251818","wikidata":"https://www.wikidata.org/wiki/Q816754","display_name":"Benchmarking","level":2,"score":0.4765999913215637},{"id":"https://openalex.org/C206345919","wikidata":"https://www.wikidata.org/wiki/Q20380951","display_name":"Resource (disambiguation)","level":2,"score":0.46320000290870667},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.44029998779296875},{"id":"https://openalex.org/C2777115002","wikidata":"https://www.wikidata.org/wiki/Q7168246","display_name":"Performance prediction","level":2,"score":0.4316999912261963},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.41440001130104065},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.41110000014305115},{"id":"https://openalex.org/C99844830","wikidata":"https://www.wikidata.org/wiki/Q102441924","display_name":"Scaling","level":2,"score":0.40070000290870667},{"id":"https://openalex.org/C2742236","wikidata":"https://www.wikidata.org/wiki/Q924713","display_name":"Efficient energy use","level":2,"score":0.397599995136261},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.36820000410079956},{"id":"https://openalex.org/C29202148","wikidata":"https://www.wikidata.org/wiki/Q287260","display_name":"Resource allocation","level":2,"score":0.36660000681877136},{"id":"https://openalex.org/C186370098","wikidata":"https://www.wikidata.org/wiki/Q442787","display_name":"Energy (signal processing)","level":2,"score":0.3456000089645386},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.31060001254081726},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.2948000133037567},{"id":"https://openalex.org/C45804977","wikidata":"https://www.wikidata.org/wiki/Q7239673","display_name":"Predictive modelling","level":2,"score":0.29120001196861267},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.2838999927043915},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.2768999934196472},{"id":"https://openalex.org/C83283714","wikidata":"https://www.wikidata.org/wiki/Q121117","display_name":"Supercomputer","level":2,"score":0.2721000015735626},{"id":"https://openalex.org/C2780609101","wikidata":"https://www.wikidata.org/wiki/Q17156588","display_name":"Resource management (computing)","level":2,"score":0.27149999141693115},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.26179999113082886},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.2583000063896179},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.25369998812675476},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.2513999938964844},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.250900000333786}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/sbac-pad66369.2025.00028","is_oa":false,"landing_page_url":"https://doi.org/10.1109/sbac-pad66369.2025.00028","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/SBC 37th International Symposium on Computer Architecture and High Performance Computing (SBAC-PAD)","raw_type":"proceedings-article"},{"id":"pmh:oai:HAL:hal-05398496v1","is_oa":false,"landing_page_url":"https://minesparis-psl.hal.science/hal-05398496","pdf_url":null,"source":{"id":"https://openalex.org/S4306402512","display_name":"HAL (Le Centre pour la Communication Scientifique Directe)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1294671590","host_organization_name":"Centre National de la Recherche Scientifique","host_organization_lineage":["https://openalex.org/I1294671590"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"2025 IEEE/SBC 37th International Symposium on Computer Architecture and High Performance Computing (SBAC-PAD), Oct 2025, Bonito, Brazil. pp.215-226, &#x27E8;10.1109/SBAC-PAD66369.2025.00028&#x27E9;","raw_type":"Conference papers"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":32,"referenced_works":["https://openalex.org/W1595159159","https://openalex.org/W1679689118","https://openalex.org/W1961751213","https://openalex.org/W1972384243","https://openalex.org/W1976412024","https://openalex.org/W2002555321","https://openalex.org/W2026927847","https://openalex.org/W2077305093","https://openalex.org/W2082545328","https://openalex.org/W2251939518","https://openalex.org/W2338973660","https://openalex.org/W2554302513","https://openalex.org/W2923014074","https://openalex.org/W3015498000","https://openalex.org/W3024575953","https://openalex.org/W3086781878","https://openalex.org/W3124057498","https://openalex.org/W4206421714","https://openalex.org/W4283704460","https://openalex.org/W4286367840","https://openalex.org/W4287254789","https://openalex.org/W4289827994","https://openalex.org/W4308090454","https://openalex.org/W4317433347","https://openalex.org/W4318603355","https://openalex.org/W4387005417","https://openalex.org/W4390189088","https://openalex.org/W4399450035","https://openalex.org/W4401212524","https://openalex.org/W4404740455","https://openalex.org/W4407196897","https://openalex.org/W4412505644"],"related_works":[],"abstract_inverted_index":{"The":[0,170],"rapid":[1],"scaling":[2,95],"of":[3,58,162],"deep":[4],"learning":[5],"(DL)":[6],"models":[7,128],"raises":[8],"the":[9],"need":[10],"for":[11,55,119,165,168,178],"accurate":[12],"and":[13,23,40,77,116,133,183],"understandable":[14],"performance/energy":[15],"prediction":[16,57],"tools":[17],"to":[18,35,43,45,81,92,110,174],"support":[19],"efficient":[20],"resource":[21],"management":[22],"sustainable":[24],"AI":[25],"development.":[26],"Existing":[27],"modeling":[28],"approaches":[29],"often":[30],"lack":[31],"both":[32],"sufficient":[33],"granularity":[34],"capture":[36,93],"nuanced":[37],"hardware-software":[38],"interactions":[39],"suitable":[41],"flexibility":[42],"adapt":[44],"diverse":[46],"modern":[47],"architectures.":[48],"This":[49],"paper":[50],"introduces":[51],"an":[52,83,102,158],"analytical":[53],"framework":[54,65,171],"time/energy":[56],"DL":[59,127],"training":[60],"workloads":[61],"on":[62,97,137],"GPU.":[63],"Our":[64,122],"integrates":[66],"detailed":[67],"workload":[68],"characterization":[69],"that":[70,149],"includes":[71],"FLOPs,":[72],"memory":[73],"access,":[74],"kernel":[75],"activities,":[76],"novel":[78],"structural":[79],"features":[80],"derive":[82],"architecture-aware":[84],"efficiency":[85],"model,":[86],"which":[87,106],"considers":[88],"a":[89,153],"saturation-based":[90],"function":[91],"dimensional":[94],"effects":[96],"hardware":[98],"utilization.":[99],"We":[100],"propose":[101],"iterative":[103],"refinement":[104],"methodology,":[105],"incorporates":[107],"model-specific":[108],"scalars":[109],"address":[111],"particular":[112],"architectures":[113],"like":[114,135],"ALBERT":[115],"precision-specific":[117],"calibrations":[118],"BF16":[120],"operations.":[121],"benchmark":[123],"with":[124,157],"six":[125],"advanced":[126],"(including":[129],"CNNs,":[130],"BERT-style":[131],"Transformers,":[132],"LLMs":[134],"TinyLlama)":[136],"NVIDIA":[138],"A100":[139],"GPUs":[140],"under":[141],"various":[142],"configurations":[143],"(1/4":[144],"GPUs,":[145],"FP32/TF32/mixed":[146],"BF16)":[147],"shows":[148],"our":[150],"approach":[151],"achieves":[152],"high":[154],"predictive":[155],"accuracy,":[156],"overall":[159],"relative":[160],"error":[161],"4.14%":[163],"(3.05%":[164],"time,":[166],"5.78%":[167],"power).":[169],"is":[172],"intended":[173],"provide":[175],"valuable":[176],"insights":[177],"HPC-AI":[179],"co-design,":[180],"energy-aware":[181],"scheduling,":[182],"performance":[184],"optimization.":[185]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-12-03T00:00:00"}
