{"id":"https://openalex.org/W7101394727","doi":"https://doi.org/10.1145/3721462.3770773","title":"xMem: A CPU-Based Approach for Accurate Estimation of GPU Memory in Deep Learning Training Workloads","display_name":"xMem: A CPU-Based Approach for Accurate Estimation of GPU Memory in Deep Learning Training Workloads","publication_year":2025,"publication_date":"2025-12-08","ids":{"openalex":"https://openalex.org/W7101394727","doi":"https://doi.org/10.1145/3721462.3770773"},"language":"en","primary_location":{"id":"doi:10.1145/3721462.3770773","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3721462.3770773","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 26th International Middleware Conference","raw_type":"proceedings-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3721462.3770773","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Jiabo Shi","orcid":"https://orcid.org/0009-0000-8326-3663"},"institutions":[{"id":"https://openalex.org/I7882870","display_name":"University of Glasgow","ror":"https://ror.org/00vtgdb53","country_code":"GB","type":"education","lineage":["https://openalex.org/I7882870"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Jiabo Shi","raw_affiliation_strings":["University of Glasgow, Glasgow, United Kingdom"],"affiliations":[{"raw_affiliation_string":"University of Glasgow, Glasgow, United Kingdom","institution_ids":["https://openalex.org/I7882870"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Dimitrios Pezaros","orcid":"https://orcid.org/0000-0003-0939-378X"},"institutions":[{"id":"https://openalex.org/I7882870","display_name":"University of Glasgow","ror":"https://ror.org/00vtgdb53","country_code":"GB","type":"education","lineage":["https://openalex.org/I7882870"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Dimitrios Pezaros","raw_affiliation_strings":["University of Glasgow, Glasgow, United Kingdom"],"affiliations":[{"raw_affiliation_string":"University of Glasgow, Glasgow, United Kingdom","institution_ids":["https://openalex.org/I7882870"]}]},{"author_position":"last","author":{"id":null,"display_name":"Yehia Elkhatib","orcid":"https://orcid.org/0000-0003-4639-436X"},"institutions":[{"id":"https://openalex.org/I7882870","display_name":"University of Glasgow","ror":"https://ror.org/00vtgdb53","country_code":"GB","type":"education","lineage":["https://openalex.org/I7882870"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Yehia Elkhatib","raw_affiliation_strings":["University of Glasgow, Glasgow, United Kingdom"],"affiliations":[{"raw_affiliation_string":"University of Glasgow, Glasgow, United Kingdom","institution_ids":["https://openalex.org/I7882870"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I7882870"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.58973289,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"256","last_page":"269"},"is_retracted":false,"is_paratext":false,"is_xpac":true,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.869700014591217,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.869700014591217,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.028200000524520874,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T14347","display_name":"Big Data and Digital Economy","score":0.019899999722838402,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.6848999857902527},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.5468999743461609},{"id":"https://openalex.org/keywords/scheduling","display_name":"Scheduling (production processes)","score":0.4099000096321106},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.38089999556541443},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.3772999942302704},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.3732999861240387},{"id":"https://openalex.org/keywords/source-code","display_name":"Source code","score":0.335099995136261}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8658999800682068},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.6848999857902527},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.5468999743461609},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.49480000138282776},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4399999976158142},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.4099000096321106},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.40119999647140503},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.38089999556541443},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.3772999942302704},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.3732999861240387},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.34619998931884766},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.335099995136261},{"id":"https://openalex.org/C12186640","wikidata":"https://www.wikidata.org/wiki/Q6815743","display_name":"Memory model","level":3,"score":0.3089999854564667},{"id":"https://openalex.org/C162262903","wikidata":"https://www.wikidata.org/wiki/Q343527","display_name":"Allocator","level":2,"score":0.29660001397132874},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.29499998688697815},{"id":"https://openalex.org/C96250715","wikidata":"https://www.wikidata.org/wiki/Q965330","display_name":"Estimation","level":2,"score":0.2856000065803528},{"id":"https://openalex.org/C206345919","wikidata":"https://www.wikidata.org/wiki/Q20380951","display_name":"Resource (disambiguation)","level":2,"score":0.27079999446868896},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.2687000036239624},{"id":"https://openalex.org/C137981799","wikidata":"https://www.wikidata.org/wiki/Q1369184","display_name":"Reusability","level":3,"score":0.2685999870300293},{"id":"https://openalex.org/C96324660","wikidata":"https://www.wikidata.org/wiki/Q205446","display_name":"Dataflow","level":2,"score":0.266400009393692},{"id":"https://openalex.org/C50630238","wikidata":"https://www.wikidata.org/wiki/Q971505","display_name":"General-purpose computing on graphics processing units","level":3,"score":0.2587999999523163},{"id":"https://openalex.org/C2984842247","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep neural networks","level":3,"score":0.2587999999523163}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1145/3721462.3770773","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3721462.3770773","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 26th International Middleware Conference","raw_type":"proceedings-article"},{"id":"pmh:oai:eprints.gla.ac.uk:367718","is_oa":true,"landing_page_url":"https://eprints.gla.ac.uk/view/author/73217.html>,","pdf_url":null,"source":{"id":"https://openalex.org/S4210235606","display_name":"ENLIGHTEN (Jurnal Bimbingan dan Konseling Islam)","issn_l":"2622-8912","issn":["2622-8912","2622-8920"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"acceptedVersion","is_accepted":true,"is_published":false,"raw_source_name":null,"raw_type":"PeerReviewed"},{"id":"pmh:oai:arXiv.org:2510.21048","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2510.21048","pdf_url":"https://arxiv.org/pdf/2510.21048","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"doi:10.1145/3721462.3770773","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3721462.3770773","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 26th International Middleware Conference","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"The":[0,155],"global":[1],"scarcity":[2],"of":[3,19,136,157,183],"GPUs":[4],"necessitates":[5],"more":[6],"sophisticated":[7],"strategies":[8],"for":[9],"Deep":[10],"Learning":[11],"jobs":[12],"in":[13,89,213],"shared":[14],"cluster":[15],"environments.":[16],"Accurate":[17],"estimation":[18,48,184],"how":[20],"much":[21],"GPU":[22,35,72,101,126],"memory":[23,93,96,127,214],"a":[24,114,129,133,210],"job":[25],"will":[26],"require":[27,80],"is":[28],"fundamental":[29],"to":[30,65,122,209],"enabling":[31],"advanced":[32],"scheduling":[33],"and":[34,43,77,103,153,163,178],"sharing,":[36],"which":[37,160],"helps":[38],"prevent":[39],"out-of-memory":[40],"(OOM)":[41],"errors":[42],"resource":[44],"underutilization.":[45],"However,":[46],"existing":[47],"methods":[49],"have":[50],"limitations.":[51],"Approaches":[52],"relying":[53],"on":[54],"static":[55],"analysis":[56,73,121,156],"or":[57],"historical":[58],"data":[59],"with":[60],"machine":[61],"learning":[62],"often":[63,198],"fail":[64],"accurately":[66,123],"capture":[67],"runtime":[68],"dynamics.":[69],"Furthermore,":[70],"direct":[71],"consumes":[74],"scarce":[75],"resources,":[76],"some":[78],"techniques":[79],"intrusive":[81],"code":[82,105],"modifications.":[83],"Thus,":[84],"the":[85,172,181,194],"key":[86],"challenge":[87],"lies":[88],"precisely":[90],"estimating":[91],"dynamic":[92,120],"requirements,":[94],"including":[95,147],"allocator":[97],"nuances,":[98],"without":[99,202],"consuming":[100],"resources":[102],"non-intrusive":[104],"changes.":[106],"To":[107],"address":[108],"this":[109],"challenge,":[110],"we":[111],"propose":[112],"xMem,":[113],"novel":[115],"framework":[116],"that":[117,193],"leverages":[118],"CPU-only":[119],"estimate":[124],"peak":[125],"requirements":[128],"priori.":[130],"We":[131],"conducted":[132],"thorough":[134],"evaluation":[135],"xMem":[137],"against":[138],"state-of-the-art":[139],"solutions":[140],"using":[141],"workloads":[142],"from":[143],"25":[144],"different":[145],"models,":[146],"architectures":[148],"like":[149],"Convolutional":[150],"Neural":[151],"Networks":[152],"Transformers.":[154],"5209":[158],"runs,":[159],"includes":[161],"ANOVA":[162],"Monte":[164],"Carlo":[165],"results,":[166],"highlights":[167],"xMem's":[168],"benefits:":[169],"it":[170],"decreases":[171],"median":[173],"relative":[174],"error":[175],"by":[176,190],"91%":[177],"significantly":[179],"reduces":[180],"probability":[182],"failure":[185],"as":[186],"safe":[187],"OOM":[188],"thresholds":[189],"75%,":[191],"meaning":[192],"estimated":[195],"value":[196],"can":[197],"be":[199],"used":[200],"directly":[201],"causing":[203],"OOM.":[204],"Ultimately,":[205],"these":[206],"improvements":[207],"lead":[208],"368%":[211],"increase":[212],"conservation":[215],"potential":[216],"over":[217],"current":[218],"solutions.":[219]},"counts_by_year":[],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-28T00:00:00"}
