{"id":"https://openalex.org/W4411551698","doi":"https://doi.org/10.1109/cscwd64889.2025.11033565","title":"Efficient Training of Large Language Models on Legacy GPUs with HetSeq and PyTorch","display_name":"Efficient Training of Large Language Models on Legacy GPUs with HetSeq and PyTorch","publication_year":2025,"publication_date":"2025-05-05","ids":{"openalex":"https://openalex.org/W4411551698","doi":"https://doi.org/10.1109/cscwd64889.2025.11033565"},"language":"en","primary_location":{"id":"doi:10.1109/cscwd64889.2025.11033565","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cscwd64889.2025.11033565","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 28th International Conference on Computer Supported Cooperative Work in Design (CSCWD)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5118596134","display_name":"Bruno Leite Franco","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Bruno Leite Franco","raw_affiliation_strings":["Graduate Program in Applied Informatics Pontif&#x00ED;cia Universidade Cat&#x00F3; lica do Paran&#x00E1;,Curitiba,PR,Brazil"],"affiliations":[{"raw_affiliation_string":"Graduate Program in Applied Informatics Pontif&#x00ED;cia Universidade Cat&#x00F3; lica do Paran&#x00E1;,Curitiba,PR,Brazil","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5082099991","display_name":"Edson Em\u00edlio Scalabrin","orcid":"https://orcid.org/0000-0002-3918-1799"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Edson Em\u00edlio Scalabrin","raw_affiliation_strings":["Graduate Program in Applied Informatics Pontif&#x00ED;cia Universidade Cat&#x00F3; lica do Paran&#x00E1;,Curitiba,PR,Brazil"],"affiliations":[{"raw_affiliation_string":"Graduate Program in Applied Informatics Pontif&#x00ED;cia Universidade Cat&#x00F3; lica do Paran&#x00E1;,Curitiba,PR,Brazil","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5118596134"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0784616,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1526","last_page":"1530"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9850999712944031,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9850999712944031,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9168999791145325,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9036999940872192,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7968968152999878},{"id":"https://openalex.org/keywords/training","display_name":"Training (meteorology)","score":0.5233344435691833},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.5001041889190674},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.48790356516838074},{"id":"https://openalex.org/keywords/general-purpose-computing-on-graphics-processing-units","display_name":"General-purpose computing on graphics processing units","score":0.42973387241363525},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.3804377019405365},{"id":"https://openalex.org/keywords/computer-graphics","display_name":"Computer graphics (images)","score":0.17302781343460083},{"id":"https://openalex.org/keywords/graphics","display_name":"Graphics","score":0.14218604564666748}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7968968152999878},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.5233344435691833},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.5001041889190674},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.48790356516838074},{"id":"https://openalex.org/C50630238","wikidata":"https://www.wikidata.org/wiki/Q971505","display_name":"General-purpose computing on graphics processing units","level":3,"score":0.42973387241363525},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.3804377019405365},{"id":"https://openalex.org/C121684516","wikidata":"https://www.wikidata.org/wiki/Q7600677","display_name":"Computer graphics (images)","level":1,"score":0.17302781343460083},{"id":"https://openalex.org/C21442007","wikidata":"https://www.wikidata.org/wiki/Q1027879","display_name":"Graphics","level":2,"score":0.14218604564666748},{"id":"https://openalex.org/C153294291","wikidata":"https://www.wikidata.org/wiki/Q25261","display_name":"Meteorology","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/cscwd64889.2025.11033565","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cscwd64889.2025.11033565","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 28th International Conference on Computer Supported Cooperative Work in Design (CSCWD)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":8,"referenced_works":["https://openalex.org/W2007339694","https://openalex.org/W2170901118","https://openalex.org/W3173623873","https://openalex.org/W3205838256","https://openalex.org/W4206136559","https://openalex.org/W4248546022","https://openalex.org/W4403318601","https://openalex.org/W6779124799"],"related_works":["https://openalex.org/W2505380084","https://openalex.org/W230091440","https://openalex.org/W4400333498","https://openalex.org/W2233261550","https://openalex.org/W2810751659","https://openalex.org/W258997015","https://openalex.org/W2997094352","https://openalex.org/W2086739451","https://openalex.org/W3183233360","https://openalex.org/W1980160788"],"abstract_inverted_index":{"This":[0],"study":[1,30],"investigates":[2],"the":[3,42,56,67,108,113,120,124,172],"effectiveness":[4],"and":[5,39,48,55,87,116,133,185],"feasibility":[6],"of":[7,13,44,69,102,131,171],"using":[8,164],"parallel":[9],"machines":[10],"with":[11,51],"GPUs":[12,147],"different":[14],"capacities":[15],"to":[16,25,60,155],"train":[17],"large":[18],"language":[19],"models":[20],"(LLMs)":[21],"as":[22,145,160],"an":[23,99],"alternative":[24],"costly":[26],"cloud":[27],"platforms.":[28],"The":[29,90],"explores":[31],"optimization":[32],"techniques":[33],"focused":[34],"on":[35],"data,":[36],"models,":[37],"budget,":[38],"systems,":[40,65],"detailing":[41],"configuration":[43,92,115,122],"a":[45,94,129],"multi-GPU":[46],"architecture":[47],"its":[49],"implementation":[50],"CUDA":[52],"software,":[53],"PyTorch,":[54],"HetSeq":[57,88,91],"library,":[58],"adapted":[59],"maximize":[61],"performance":[62,76,96],"in":[63,123,141,188],"heterogeneous":[64,85,121,180],"including":[66],"usage":[68],"legacy":[70],"resources":[71],"(e.g.,":[72],"older":[73],"GPUs).":[74],"In":[75],"tests,":[77],"three":[78],"approaches":[79,152],"are":[80,148,182],"compared:":[81],"homogeneous":[82,114,194],"load":[83],"distribution,":[84,86],"usage.":[89],"yielded":[93],"substantial":[95],"improvement,":[97],"achieving":[98],"execution":[100],"time":[101,144],"129,759":[103],"seconds,":[104],"significantly":[105],"lower":[106],"than":[107],"153,175":[109],"seconds":[110,118],"observed":[111],"for":[112,119],"165,387":[117],"maximum":[125],"setup":[126],"tested,":[127],"representing":[128],"reduction":[130],"15%":[132],"21%,":[134],"respectively.":[135],"These":[136],"results":[137],"highlight":[138],"HetSeq's":[139],"advantage":[140],"optimizing":[142],"training":[143,173],"more":[146],"added,":[149],"outperforming":[150],"traditional":[151],"that":[153,168,178,190],"attempt":[154],"standardize":[156],"hardware":[157],"utilization,":[158],"such":[159],"PyTorch.":[161],"Further":[162],"analysis":[163],"Amdahl's":[165],"Law":[166],"reveals":[167],"approximately":[169],"82%":[170],"process":[174],"is":[175],"parallelizable,":[176],"underscoring":[177],"optimized":[179],"architectures":[181],"both":[183],"viable":[184],"economically":[186],"advantageous":[187],"scenarios":[189],"do":[191],"not":[192],"require":[193],"hardware.":[195]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
