{"id":"https://openalex.org/W4404788057","doi":"https://doi.org/10.1145/3652892.3700758","title":"Menos: Split Fine-Tuning Large Language Models with Efficient GPU Memory Sharing","display_name":"Menos: Split Fine-Tuning Large Language Models with Efficient GPU Memory Sharing","publication_year":2024,"publication_date":"2024-11-27","ids":{"openalex":"https://openalex.org/W4404788057","doi":"https://doi.org/10.1145/3652892.3700758"},"language":"en","primary_location":{"id":"doi:10.1145/3652892.3700758","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3652892.3700758","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 25th International Middleware Conference","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5103922304","display_name":"Chuntian Hu","orcid":"https://orcid.org/0009-0003-0653-2817"},"institutions":[{"id":"https://openalex.org/I185261750","display_name":"University of Toronto","ror":"https://ror.org/03dbr7087","country_code":"CA","type":"education","lineage":["https://openalex.org/I185261750"]}],"countries":["CA"],"is_corresponding":true,"raw_author_name":"Chenghao Hu","raw_affiliation_strings":["University of Toronto, Toronto, Canada"],"raw_orcid":"https://orcid.org/0009-0003-0653-2817","affiliations":[{"raw_affiliation_string":"University of Toronto, Toronto, Canada","institution_ids":["https://openalex.org/I185261750"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5083596391","display_name":"Baochun Li","orcid":"https://orcid.org/0000-0003-2404-0974"},"institutions":[{"id":"https://openalex.org/I185261750","display_name":"University of Toronto","ror":"https://ror.org/03dbr7087","country_code":"CA","type":"education","lineage":["https://openalex.org/I185261750"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Baochun Li","raw_affiliation_strings":["University of Toronto, Toronto, Canada"],"raw_orcid":"https://orcid.org/0000-0003-2404-0974","affiliations":[{"raw_affiliation_string":"University of Toronto, Toronto, Canada","institution_ids":["https://openalex.org/I185261750"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5103922304"],"corresponding_institution_ids":["https://openalex.org/I185261750"],"apc_list":null,"apc_paid":null,"fwci":0.3311,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.68210759,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"185","last_page":"198"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11273","display_name":"Advanced Graph Neural Networks","score":0.9908999800682068,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10764","display_name":"Privacy-Preserving Technologies in Data","score":0.9907000064849854,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.774543285369873},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.6529456377029419},{"id":"https://openalex.org/keywords/fine-tuning","display_name":"Fine-tuning","score":0.42998605966567993},{"id":"https://openalex.org/keywords/computational-science","display_name":"Computational science","score":0.365484356880188},{"id":"https://openalex.org/keywords/computer-graphics","display_name":"Computer graphics (images)","score":0.3256797194480896},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.0725107192993164}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.774543285369873},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.6529456377029419},{"id":"https://openalex.org/C157524613","wikidata":"https://www.wikidata.org/wiki/Q2828883","display_name":"Fine-tuning","level":2,"score":0.42998605966567993},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.365484356880188},{"id":"https://openalex.org/C121684516","wikidata":"https://www.wikidata.org/wiki/Q7600677","display_name":"Computer graphics (images)","level":1,"score":0.3256797194480896},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0725107192993164},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3652892.3700758","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3652892.3700758","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 25th International Middleware Conference","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/7","score":0.550000011920929,"display_name":"Affordable and clean energy"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":15,"referenced_works":["https://openalex.org/W2112168774","https://openalex.org/W2768663569","https://openalex.org/W2963209930","https://openalex.org/W2982899198","https://openalex.org/W3018102029","https://openalex.org/W3037377931","https://openalex.org/W3174770825","https://openalex.org/W3176828726","https://openalex.org/W3211848727","https://openalex.org/W4224311344","https://openalex.org/W4249128574","https://openalex.org/W4367046738","https://openalex.org/W4367046890","https://openalex.org/W4378697396","https://openalex.org/W4396758529"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052"],"abstract_inverted_index":{"Fine-tuning":[0],"of":[1,60,72,85,98,110,145,170,192],"pre-trained":[2],"large":[3,86,186],"language":[4,87,187],"models":[5,36,65,188],"has":[6],"become":[7],"increasingly":[8],"popular,":[9],"yet":[10,202],"existing":[11],"fine-tuning":[12,64,81,94,125,147],"methods":[13],"are":[14,37],"typically":[15],"centralized,":[16],"requiring":[17],"users":[18,44],"to":[19,23,29,46,70,79,128,151,200],"send":[20],"local":[21,69],"data":[22,34],"centralized":[24],"servers,":[25],"or":[26],"model":[27,156],"owners":[28],"open-source":[30],"their":[31,93,96],"models.":[32],"However,":[33],"and":[35,43,56,136,149],"valuable":[38],"assets":[39],"that":[40],"few":[41],"enterprises":[42],"wish":[45],"share.":[47],"In":[48],"this":[49,116],"paper,":[50],"we":[51,118],"deviate":[52],"from":[53],"conventional":[54],"wisdom":[55],"advocate":[57],"the":[58,73,83,108,121,130,142,154,167,190],"use":[59,97],"split":[61,80,124,171],"learning":[62],"for":[63],"with":[66],"private":[67],"data,":[68],"each":[71],"clients.":[74,160],"The":[75],"most":[76],"formidable":[77],"challenge":[78],"is":[82],"size":[84],"models:":[88],"when":[89],"multiple":[90,159],"clients":[91,111],"start":[92],"tasks,":[95],"GPU":[99,132,177,195],"memory":[100,178,196],"will":[101],"overwhelm":[102],"a":[103],"GPU-equipped":[104],"server,":[105],"especially":[106],"as":[107],"number":[109],"scales":[112],"up.":[113],"To":[114],"address":[115],"challenge,":[117],"present":[119],"Menos,":[120,193],"first":[122],"memory-efficient":[123],"framework":[126],"designed":[127],"optimize":[129],"server":[131],"footprint":[133],"through":[134],"spatial":[135],"temporal":[137],"sharing.":[138],"Specifically,":[139],"Menos":[140],"utilizes":[141],"adapter-based":[143],"nature":[144],"modern":[146],"techniques,":[148],"proposes":[150],"spatially":[152],"share":[153],"base":[155],"parameters":[157],"among":[158],"It":[161],"also":[162],"schedules":[163],"memory-intensive":[164],"operations":[165],"during":[166],"communication":[168],"gaps":[169],"learning,":[172],"thereby":[173],"temporally":[174],"sharing":[175],"limited":[176],"at":[179],"runtime.":[180],"Comprehensive":[181],"real-world":[182],"evaluations":[183],"using":[184],"state-of-the-art":[185],"demonstrate":[189],"effectiveness":[191],"reducing":[194],"consumption":[197],"by":[198],"up":[199],"72%,":[201],"incurring":[203],"negligible":[204],"overhead.":[205]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2025-12-27T23:08:20.325037","created_date":"2025-10-10T00:00:00"}
