{"id":"https://openalex.org/W4408498681","doi":"https://doi.org/10.1631/fitee.2300710","title":"Training large-scale language models with limited GPU memory: a survey","display_name":"Training large-scale language models with limited GPU memory: a survey","publication_year":2025,"publication_date":"2025-03-01","ids":{"openalex":"https://openalex.org/W4408498681","doi":"https://doi.org/10.1631/fitee.2300710"},"language":"en","primary_location":{"id":"doi:10.1631/fitee.2300710","is_oa":false,"landing_page_url":"https://doi.org/10.1631/fitee.2300710","pdf_url":null,"source":{"id":"https://openalex.org/S4210189857","display_name":"Frontiers of Information Technology & Electronic Engineering","issn_l":"2095-9184","issn":["2095-9184","2095-9230"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Frontiers of Information Technology &amp; Electronic Engineering","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102970145","display_name":"Yu Tang","orcid":"https://orcid.org/0000-0002-8595-1547"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yu Tang","raw_affiliation_strings":["National Key Laboratory of Parallel and Distributed Computing, College of Computer, National University of Defense Technology, Changsha, 410073, China","National Key Laboratory of Parallel and Distributed Computing, College of Computer, National University of Defense Technology, Changsha, China"],"raw_orcid":"https://orcid.org/0000-0002-8595-1547","affiliations":[{"raw_affiliation_string":"National Key Laboratory of Parallel and Distributed Computing, College of Computer, National University of Defense Technology, Changsha, 410073, China","institution_ids":["https://openalex.org/I170215575"]},{"raw_affiliation_string":"National Key Laboratory of Parallel and Distributed Computing, College of Computer, National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043045076","display_name":"Linbo Qiao","orcid":"https://orcid.org/0000-0002-8285-2738"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Linbo Qiao","raw_affiliation_strings":["National Key Laboratory of Parallel and Distributed Computing, College of Computer, National University of Defense Technology, Changsha, 410073, China","National Key Laboratory of Parallel and Distributed Computing, College of Computer, National University of Defense Technology, Changsha, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"National Key Laboratory of Parallel and Distributed Computing, College of Computer, National University of Defense Technology, Changsha, 410073, China","institution_ids":["https://openalex.org/I170215575"]},{"raw_affiliation_string":"National Key Laboratory of Parallel and Distributed Computing, College of Computer, National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5029674340","display_name":"Lujia Yin","orcid":"https://orcid.org/0009-0005-1494-2853"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lujia Yin","raw_affiliation_strings":["National Key Laboratory of Parallel and Distributed Computing, College of Computer, National University of Defense Technology, Changsha, 410073, China","National Key Laboratory of Parallel and Distributed Computing, College of Computer, National University of Defense Technology, Changsha, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"National Key Laboratory of Parallel and Distributed Computing, College of Computer, National University of Defense Technology, Changsha, 410073, China","institution_ids":["https://openalex.org/I170215575"]},{"raw_affiliation_string":"National Key Laboratory of Parallel and Distributed Computing, College of Computer, National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111797596","display_name":"Peng Liang","orcid":null},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Peng Liang","raw_affiliation_strings":["National Key Laboratory of Parallel and Distributed Computing, College of Computer, National University of Defense Technology, Changsha, 410073, China","National Key Laboratory of Parallel and Distributed Computing, College of Computer, National University of Defense Technology, Changsha, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"National Key Laboratory of Parallel and Distributed Computing, College of Computer, National University of Defense Technology, Changsha, 410073, China","institution_ids":["https://openalex.org/I170215575"]},{"raw_affiliation_string":"National Key Laboratory of Parallel and Distributed Computing, College of Computer, National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5077575204","display_name":"Ao Shen","orcid":"https://orcid.org/0000-0002-2157-4375"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ao Shen","raw_affiliation_strings":["National Key Laboratory of Parallel and Distributed Computing, College of Computer, National University of Defense Technology, Changsha, 410073, China","National Key Laboratory of Parallel and Distributed Computing, College of Computer, National University of Defense Technology, Changsha, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"National Key Laboratory of Parallel and Distributed Computing, College of Computer, National University of Defense Technology, Changsha, 410073, China","institution_ids":["https://openalex.org/I170215575"]},{"raw_affiliation_string":"National Key Laboratory of Parallel and Distributed Computing, College of Computer, National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5082769820","display_name":"Zhilin Yang","orcid":null},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhilin Yang","raw_affiliation_strings":["National Key Laboratory of Parallel and Distributed Computing, College of Computer, National University of Defense Technology, Changsha, 410073, China","National Key Laboratory of Parallel and Distributed Computing, College of Computer, National University of Defense Technology, Changsha, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"National Key Laboratory of Parallel and Distributed Computing, College of Computer, National University of Defense Technology, Changsha, 410073, China","institution_ids":["https://openalex.org/I170215575"]},{"raw_affiliation_string":"National Key Laboratory of Parallel and Distributed Computing, College of Computer, National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100663892","display_name":"Lizhi Zhang","orcid":"https://orcid.org/0000-0001-6727-1962"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lizhi Zhang","raw_affiliation_strings":["National Key Laboratory of Parallel and Distributed Computing, College of Computer, National University of Defense Technology, Changsha, 410073, China","National Key Laboratory of Parallel and Distributed Computing, College of Computer, National University of Defense Technology, Changsha, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"National Key Laboratory of Parallel and Distributed Computing, College of Computer, National University of Defense Technology, Changsha, 410073, China","institution_ids":["https://openalex.org/I170215575"]},{"raw_affiliation_string":"National Key Laboratory of Parallel and Distributed Computing, College of Computer, National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100440903","display_name":"Dongsheng Li","orcid":"https://orcid.org/0000-0001-9743-2034"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dongsheng Li","raw_affiliation_strings":["National Key Laboratory of Parallel and Distributed Computing, College of Computer, National University of Defense Technology, Changsha, 410073, China","National Key Laboratory of Parallel and Distributed Computing, College of Computer, National University of Defense Technology, Changsha, China"],"raw_orcid":"https://orcid.org/0000-0001-9743-2034","affiliations":[{"raw_affiliation_string":"National Key Laboratory of Parallel and Distributed Computing, College of Computer, National University of Defense Technology, Changsha, 410073, China","institution_ids":["https://openalex.org/I170215575"]},{"raw_affiliation_string":"National Key Laboratory of Parallel and Distributed Computing, College of Computer, National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5102970145"],"corresponding_institution_ids":["https://openalex.org/I170215575"],"apc_list":null,"apc_paid":null,"fwci":4.3465,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.93392628,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":91,"max":98},"biblio":{"volume":"26","issue":"3","first_page":"309","last_page":"331"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9975000023841858,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6652463674545288},{"id":"https://openalex.org/keywords/scale","display_name":"Scale (ratio)","score":0.53118896484375},{"id":"https://openalex.org/keywords/training","display_name":"Training (meteorology)","score":0.4268679618835449},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.4191809594631195},{"id":"https://openalex.org/keywords/cartography","display_name":"Cartography","score":0.06503018736839294},{"id":"https://openalex.org/keywords/geography","display_name":"Geography","score":0.05666932463645935}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6652463674545288},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.53118896484375},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.4268679618835449},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.4191809594631195},{"id":"https://openalex.org/C58640448","wikidata":"https://www.wikidata.org/wiki/Q42515","display_name":"Cartography","level":1,"score":0.06503018736839294},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.05666932463645935},{"id":"https://openalex.org/C153294291","wikidata":"https://www.wikidata.org/wiki/Q25261","display_name":"Meteorology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1631/fitee.2300710","is_oa":false,"landing_page_url":"https://doi.org/10.1631/fitee.2300710","pdf_url":null,"source":{"id":"https://openalex.org/S4210189857","display_name":"Frontiers of Information Technology & Electronic Engineering","issn_l":"2095-9184","issn":["2095-9184","2095-9230"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Frontiers of Information Technology &amp; Electronic Engineering","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":115,"referenced_works":["https://openalex.org/W1548885290","https://openalex.org/W1659842140","https://openalex.org/W1965869859","https://openalex.org/W1979566015","https://openalex.org/W1983364832","https://openalex.org/W2031213082","https://openalex.org/W2034996255","https://openalex.org/W2039189705","https://openalex.org/W2070996757","https://openalex.org/W2102003408","https://openalex.org/W2163605009","https://openalex.org/W2186615578","https://openalex.org/W2194775991","https://openalex.org/W2338908902","https://openalex.org/W2469490737","https://openalex.org/W2489529491","https://openalex.org/W2763421725","https://openalex.org/W2796649226","https://openalex.org/W2797328513","https://openalex.org/W2867345499","https://openalex.org/W2884150179","https://openalex.org/W2884700152","https://openalex.org/W2919115771","https://openalex.org/W2938830017","https://openalex.org/W2963159690","https://openalex.org/W2963341956","https://openalex.org/W2963748441","https://openalex.org/W2964199361","https://openalex.org/W2969210150","https://openalex.org/W2969388332","https://openalex.org/W2973727699","https://openalex.org/W2985738161","https://openalex.org/W2990896553","https://openalex.org/W3010830594","https://openalex.org/W3011574394","https://openalex.org/W3012479151","https://openalex.org/W3012514909","https://openalex.org/W3020605687","https://openalex.org/W3044384472","https://openalex.org/W3081168214","https://openalex.org/W3086105743","https://openalex.org/W3088409176","https://openalex.org/W3119866685","https://openalex.org/W3129831491","https://openalex.org/W3130554079","https://openalex.org/W3132107458","https://openalex.org/W3138516171","https://openalex.org/W3158146252","https://openalex.org/W3158631574","https://openalex.org/W3169936356","https://openalex.org/W3182414949","https://openalex.org/W3187018546","https://openalex.org/W3189259198","https://openalex.org/W3204105967","https://openalex.org/W3205803342","https://openalex.org/W3206832494","https://openalex.org/W3210871626","https://openalex.org/W4205390421","https://openalex.org/W4213019189","https://openalex.org/W4214658871","https://openalex.org/W4220838824","https://openalex.org/W4224211968","https://openalex.org/W4224330420","https://openalex.org/W4225823287","https://openalex.org/W4226265968","https://openalex.org/W4287168993","https://openalex.org/W4289533987","https://openalex.org/W4297813615","https://openalex.org/W4301183483","https://openalex.org/W4301239768","https://openalex.org/W4301361180","https://openalex.org/W4317464085","https://openalex.org/W4317940150","https://openalex.org/W4322775461","https://openalex.org/W4327810158","https://openalex.org/W4384648639","https://openalex.org/W4385572156","https://openalex.org/W4385968090","https://openalex.org/W4386348101","https://openalex.org/W4386709668","https://openalex.org/W4386768656","https://openalex.org/W4387356156","https://openalex.org/W4393145114","https://openalex.org/W6600003358","https://openalex.org/W6600120041","https://openalex.org/W6600157417","https://openalex.org/W6600171454","https://openalex.org/W6600194071","https://openalex.org/W6600213211","https://openalex.org/W6600225990","https://openalex.org/W6600339963","https://openalex.org/W6600466347","https://openalex.org/W6600662749","https://openalex.org/W6600680261","https://openalex.org/W6600804061","https://openalex.org/W6601323341","https://openalex.org/W6601517772","https://openalex.org/W6602480875","https://openalex.org/W6602610147","https://openalex.org/W6602728322","https://openalex.org/W6603103775","https://openalex.org/W6603944243","https://openalex.org/W6604529977","https://openalex.org/W6605821484","https://openalex.org/W6605862987","https://openalex.org/W6608277728","https://openalex.org/W6609140779","https://openalex.org/W6609645346","https://openalex.org/W6631190155","https://openalex.org/W6638599789","https://openalex.org/W6679436768","https://openalex.org/W6743235451","https://openalex.org/W6771626834","https://openalex.org/W6773732629","https://openalex.org/W6829887170"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W230091440","https://openalex.org/W2390279801","https://openalex.org/W2233261550","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2810751659"],"abstract_inverted_index":{"Large-scale":[0],"models":[1,35,58,160],"have":[2],"gained":[3],"significant":[4],"attention":[5],"in":[6,31,122,135,156],"a":[7,28,51,142],"wide":[8],"range":[9],"of":[10,41,75,98,119],"fields,":[11],"such":[12],"as":[13,141],"computer":[14],"vision":[15],"and":[16,87,133,147,154],"natural":[17],"language":[18,125,159],"processing,":[19],"due":[20],"to":[21,72],"their":[22],"effectiveness":[23],"across":[24],"various":[25],"applications.":[26],"However,":[27],"notable":[29],"hurdle":[30],"training":[32,56,80,123,157],"these":[33,105],"large-scale":[34,57,124,158],"is":[36],"the":[37,68,73,79,99,109,117,128,152],"limited":[38,60,162],"memory":[39,77,120],"capacity":[40],"graphics":[42],"processing":[43],"units":[44],"(GPUs).":[45],"In":[46],"this":[47,91,136],"paper,":[48],"we":[49,93],"present":[50,94],"comprehensive":[52],"survey":[53,139],"focused":[54],"on":[55,116,150],"with":[59,161],"GPU":[61,76,163],"memory.":[62,164],"The":[63],"exploration":[64],"commences":[65],"by":[66,112],"scrutinizing":[67],"factors":[69],"that":[70,103],"contribute":[71],"consumption":[74],"during":[78],"process,":[81],"namely":[82],"model":[83,85,88],"parameters,":[84],"states,":[86],"activations.":[89],"Following":[90],"analysis,":[92],"an":[95,114],"in-depth":[96],"overview":[97],"relevant":[100],"research":[101,132],"work":[102],"addresses":[104],"aspects":[106],"individually.":[107],"Finally,":[108],"paper":[110],"concludes":[111],"presenting":[113],"outlook":[115],"future":[118],"optimization":[121],"models,":[126],"emphasizing":[127],"necessity":[129],"for":[130,145],"continued":[131],"innovation":[134],"area.":[137],"This":[138],"serves":[140],"valuable":[143],"resource":[144],"researchers":[146],"practitioners":[148],"keen":[149],"comprehending":[151],"challenges":[153],"advancements":[155]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":1}],"updated_date":"2026-01-23T23:20:30.427331","created_date":"2025-10-10T00:00:00"}
