{"id":"https://openalex.org/W4394969374","doi":"https://doi.org/10.1145/3642970.3655827","title":"An Analysis of Collocation on GPUs for Deep Learning Training","display_name":"An Analysis of Collocation on GPUs for Deep Learning Training","publication_year":2024,"publication_date":"2024-04-19","ids":{"openalex":"https://openalex.org/W4394969374","doi":"https://doi.org/10.1145/3642970.3655827"},"language":"en","primary_location":{"id":"doi:10.1145/3642970.3655827","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3642970.3655827","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 4th Workshop on Machine Learning and Systems","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://itu-dasyalab.github.io/RAD/publication/papers/collocation_analysisi_euromlsys2024.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5006238663","display_name":"Ties Robroek","orcid":"https://orcid.org/0009-0006-3451-5602"},"institutions":[{"id":"https://openalex.org/I83467386","display_name":"IT University of Copenhagen","ror":"https://ror.org/02309jg23","country_code":"DK","type":"education","lineage":["https://openalex.org/I83467386"]}],"countries":["DK"],"is_corresponding":false,"raw_author_name":"Ties Robroek","raw_affiliation_strings":["IT University of Copenhagen"],"raw_orcid":"https://orcid.org/0009-0006-3451-5602","affiliations":[{"raw_affiliation_string":"IT University of Copenhagen","institution_ids":["https://openalex.org/I83467386"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5046504176","display_name":"Ehsan Yousefzadeh-Asl-Miandoab","orcid":"https://orcid.org/0000-0003-0156-1435"},"institutions":[{"id":"https://openalex.org/I83467386","display_name":"IT University of Copenhagen","ror":"https://ror.org/02309jg23","country_code":"DK","type":"education","lineage":["https://openalex.org/I83467386"]}],"countries":["DK"],"is_corresponding":false,"raw_author_name":"Ehsan Yousefzadeh-Asl-Miandoab","raw_affiliation_strings":["IT University of Copenhagen"],"raw_orcid":"https://orcid.org/0000-0003-0156-1435","affiliations":[{"raw_affiliation_string":"IT University of Copenhagen","institution_ids":["https://openalex.org/I83467386"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5061990490","display_name":"P\u0131nar T\u00f6z\u00fcn","orcid":"https://orcid.org/0000-0001-6838-4854"},"institutions":[{"id":"https://openalex.org/I83467386","display_name":"IT University of Copenhagen","ror":"https://ror.org/02309jg23","country_code":"DK","type":"education","lineage":["https://openalex.org/I83467386"]}],"countries":["DK"],"is_corresponding":false,"raw_author_name":"P\u0131nar T\u00f6z\u00fcn","raw_affiliation_strings":["IT University of Copenhagen"],"raw_orcid":"https://orcid.org/0000-0001-6838-4854","affiliations":[{"raw_affiliation_string":"IT University of Copenhagen","institution_ids":["https://openalex.org/I83467386"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":3.0206,"has_fulltext":true,"cited_by_count":15,"citation_normalized_percentile":{"value":0.923238,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"81","last_page":"90"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.9918000102043152,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.9873999953269958,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7531534433364868},{"id":"https://openalex.org/keywords/collocation","display_name":"Collocation (remote sensing)","score":0.7143445611000061},{"id":"https://openalex.org/keywords/training","display_name":"Training (meteorology)","score":0.5929911732673645},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.5704570412635803},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5074252486228943},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.49429941177368164},{"id":"https://openalex.org/keywords/computational-science","display_name":"Computational science","score":0.3553972840309143},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.2353937327861786}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7531534433364868},{"id":"https://openalex.org/C80023036","wikidata":"https://www.wikidata.org/wiki/Q5147531","display_name":"Collocation (remote sensing)","level":2,"score":0.7143445611000061},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.5929911732673645},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.5704570412635803},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5074252486228943},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.49429941177368164},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.3553972840309143},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2353937327861786},{"id":"https://openalex.org/C153294291","wikidata":"https://www.wikidata.org/wiki/Q25261","display_name":"Meteorology","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3642970.3655827","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3642970.3655827","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 4th Workshop on Machine Learning and Systems","raw_type":"proceedings-article"},{"id":"pmh:oai:pure.atira.dk:publications/8b5e5d8e-ab9e-4d7b-90f5-806f4ae31520","is_oa":true,"landing_page_url":"https://pure.itu.dk/portal/da/publications/8b5e5d8e-ab9e-4d7b-90f5-806f4ae31520","pdf_url":"https://itu-dasyalab.github.io/RAD/publication/papers/collocation_analysisi_euromlsys2024.pdf","source":{"id":"https://openalex.org/S4377196680","display_name":"IT University Of Copenhagen (IT University of Copenhagen)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I83467386","host_organization_name":"IT University of Copenhagen","host_organization_lineage":["https://openalex.org/I83467386"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Robroek, T T, Yousefzadeh-Asl-Miandoab, E & T\u00f6z\u00fcn, P 2024, An Analysis of Collocation on GPUs for Deep Learning Training. in Proceedings of the 4th Workshop on Machine Learning and Systems, EuroMLSys 2024, Athens, Greece, 22 April 2024. Association for Computing Machinery, pp. 81-90. https://doi.org/10.1145/3642970.3655827","raw_type":"info:eu-repo/semantics/publishedVersion"}],"best_oa_location":{"id":"pmh:oai:pure.atira.dk:publications/8b5e5d8e-ab9e-4d7b-90f5-806f4ae31520","is_oa":true,"landing_page_url":"https://pure.itu.dk/portal/da/publications/8b5e5d8e-ab9e-4d7b-90f5-806f4ae31520","pdf_url":"https://itu-dasyalab.github.io/RAD/publication/papers/collocation_analysisi_euromlsys2024.pdf","source":{"id":"https://openalex.org/S4377196680","display_name":"IT University Of Copenhagen (IT University of Copenhagen)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I83467386","host_organization_name":"IT University of Copenhagen","host_organization_lineage":["https://openalex.org/I83467386"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Robroek, T T, Yousefzadeh-Asl-Miandoab, E & T\u00f6z\u00fcn, P 2024, An Analysis of Collocation on GPUs for Deep Learning Training. in Proceedings of the 4th Workshop on Machine Learning and Systems, EuroMLSys 2024, Athens, Greece, 22 April 2024. Association for Computing Machinery, pp. 81-90. https://doi.org/10.1145/3642970.3655827","raw_type":"info:eu-repo/semantics/publishedVersion"},"sustainable_development_goals":[{"display_name":"Industry, innovation and infrastructure","score":0.4099999964237213,"id":"https://metadata.un.org/sdg/9"}],"awards":[{"id":"https://openalex.org/G5966697789","display_name":null,"funder_award_id":"0171-00061B","funder_id":"https://openalex.org/F4320323817","funder_display_name":"Universitas Brawijaya"}],"funders":[{"id":"https://openalex.org/F4320323817","display_name":"Universitas Brawijaya","ror":"https://ror.org/01wk3d929"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4394969374.pdf","grobid_xml":"https://content.openalex.org/works/W4394969374.grobid-xml"},"referenced_works_count":23,"referenced_works":["https://openalex.org/W2117539524","https://openalex.org/W2160121678","https://openalex.org/W2323693848","https://openalex.org/W2409247164","https://openalex.org/W2510980549","https://openalex.org/W2625231790","https://openalex.org/W2794729807","https://openalex.org/W2891285910","https://openalex.org/W2964293511","https://openalex.org/W2970139027","https://openalex.org/W2970971581","https://openalex.org/W3128271665","https://openalex.org/W3137350414","https://openalex.org/W3157306683","https://openalex.org/W4214634256","https://openalex.org/W4221109973","https://openalex.org/W4237249260","https://openalex.org/W4243035950","https://openalex.org/W4295312788","https://openalex.org/W4304192541","https://openalex.org/W4368353224","https://openalex.org/W4380925477","https://openalex.org/W4394969374"],"related_works":["https://openalex.org/W2370840338","https://openalex.org/W2388641108","https://openalex.org/W2368317224","https://openalex.org/W2370651559","https://openalex.org/W230091440","https://openalex.org/W2383177152","https://openalex.org/W3000893075","https://openalex.org/W2349694428","https://openalex.org/W2384630579","https://openalex.org/W2358919722"],"abstract_inverted_index":{"Deep":[0],"learning":[1],"training":[2,15,68,79,153],"is":[3],"an":[4],"expensive":[5],"process":[6],"that":[7,64],"extensively":[8],"uses":[9],"GPUs.":[10,19],"However,":[11],"not":[12],"all":[13],"model":[14,67],"saturates":[16],"modern":[17],"powerful":[18],"To":[20],"create":[21],"guidelines":[22],"for":[23,148],"such":[24],"cases,":[25],"this":[26],"paper":[27],"examines":[28],"the":[29,32,45,57,86,89,97,104,111,140],"performance":[30],"of":[31,96,110,146],"different":[33],"collocation":[34,147],"methods":[35],"available":[36,105],"on":[37,44],"NVIDIA":[38],"GPUs:":[39],"na\u00efvely":[40],"submitting":[41,152],"multiple":[42,49,66],"processes":[43],"same":[46],"GPU":[47,59,127],"using":[48],"streams,":[50],"utilizing":[51],"Multi-Process":[52],"Service":[53],"(MPS),":[54],"and":[55,93,107,142],"enabling":[56],"Multi-Instance":[58],"(MIG).":[60],"Our":[61],"results":[62],"demonstrate":[63],"collocating":[65],"runs":[69],"yields":[70],"significant":[71],"benefits,":[72],"leading":[73],"to":[74,76,118],"up":[75],"three":[77],"times":[78],"throughput":[80],"despite":[81],"increased":[82],"epoch":[83],"time.":[84],"On":[85],"other":[87],"hand,":[88],"aggregate":[90],"memory":[91,106],"footprint":[92],"compute":[94,108],"needs":[95],"models":[98],"trained":[99],"in":[100],"parallel":[101],"must":[102],"fit":[103],"resources":[109],"GPU.":[112],"MIG":[113],"can":[114,123],"be":[115],"beneficial":[116],"thanks":[117],"its":[119],"interference-free":[120],"partitioning":[121],"but":[122],"suffer":[124],"from":[125],"sub-optimal":[126],"utilization":[128],"with":[129],"dynamic":[130],"or":[131],"mixed":[132],"workloads.":[133],"In":[134],"general,":[135],"we":[136],"recommend":[137],"MPS":[138],"as":[139],"best-performing":[141],"most":[143],"flexible":[144],"form":[145],"a":[149],"single":[150],"user":[151],"jobs.":[154]},"counts_by_year":[{"year":2026,"cited_by_count":3},{"year":2025,"cited_by_count":6},{"year":2024,"cited_by_count":5},{"year":2023,"cited_by_count":1}],"updated_date":"2026-06-19T17:40:00.097472","created_date":"2025-10-10T00:00:00"}
