{"id":"https://openalex.org/W4405032530","doi":"https://doi.org/10.48550/arxiv.2412.00071","title":"COAP: Memory-Efficient Training with Correlation-Aware Gradient Projection","display_name":"COAP: Memory-Efficient Training with Correlation-Aware Gradient Projection","publication_year":2024,"publication_date":"2024-11-26","ids":{"openalex":"https://openalex.org/W4405032530","doi":"https://doi.org/10.48550/arxiv.2412.00071"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2412.00071","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2412.00071","pdf_url":"https://arxiv.org/pdf/2412.00071","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2412.00071","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5068769617","display_name":"Jinqi Xiao","orcid":"https://orcid.org/0009-0004-7311-9413"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Xiao, Jinqi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111111953","display_name":"Shen Sang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sang, Shen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5080638933","display_name":"Tiancheng Zhi","orcid":"https://orcid.org/0000-0002-0953-1444"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhi, Tiancheng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100375136","display_name":"Jing Liu","orcid":"https://orcid.org/0000-0003-4690-1886"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Jing","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110071835","display_name":"Qing Yan","orcid":"https://orcid.org/0000-0003-2321-4207"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yan, Qing","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Zhang, Yuqian","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Yuqian","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5081407963","display_name":"Linjie Luo","orcid":"https://orcid.org/0000-0001-6322-1175"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Luo, Linjie","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5032061647","display_name":"Bo Yuan","orcid":"https://orcid.org/0000-0003-2169-0007"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yuan, Bo","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5068769617"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9829000234603882,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9829000234603882,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12702","display_name":"Brain Tumor Detection and Classification","score":0.972100019454956,"subfield":{"id":"https://openalex.org/subfields/2808","display_name":"Neurology"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9707000255584717,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/training","display_name":"Training (meteorology)","score":0.6601107120513916},{"id":"https://openalex.org/keywords/projection","display_name":"Projection (relational algebra)","score":0.6467304229736328},{"id":"https://openalex.org/keywords/correlation","display_name":"Correlation","score":0.5921602249145508},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5763186812400818},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5030557513237},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.21039357781410217},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.19273677468299866},{"id":"https://openalex.org/keywords/geography","display_name":"Geography","score":0.15523073077201843},{"id":"https://openalex.org/keywords/geometry","display_name":"Geometry","score":0.07375836372375488},{"id":"https://openalex.org/keywords/meteorology","display_name":"Meteorology","score":0.06161502003669739}],"concepts":[{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.6601107120513916},{"id":"https://openalex.org/C57493831","wikidata":"https://www.wikidata.org/wiki/Q3134666","display_name":"Projection (relational algebra)","level":2,"score":0.6467304229736328},{"id":"https://openalex.org/C117220453","wikidata":"https://www.wikidata.org/wiki/Q5172842","display_name":"Correlation","level":2,"score":0.5921602249145508},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5763186812400818},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5030557513237},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.21039357781410217},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.19273677468299866},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.15523073077201843},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.07375836372375488},{"id":"https://openalex.org/C153294291","wikidata":"https://www.wikidata.org/wiki/Q25261","display_name":"Meteorology","level":1,"score":0.06161502003669739}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2412.00071","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2412.00071","pdf_url":"https://arxiv.org/pdf/2412.00071","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2412.00071","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2412.00071","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2412.00071","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2412.00071","pdf_url":"https://arxiv.org/pdf/2412.00071","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W230091440","https://openalex.org/W2390279801","https://openalex.org/W2233261550","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2810751659"],"abstract_inverted_index":{"Training":[0],"large-scale":[1],"neural":[2],"networks":[3],"in":[4,121],"vision,":[5,112],"and":[6,56,80,114,125,157],"multimodal":[7,115],"domains":[8],"demands":[9],"substantial":[10],"memory":[11,28,52,133,154],"resources,":[12],"primarily":[13],"due":[14,36],"to":[15,37,72],"the":[16,38,143],"storage":[17],"of":[18,40],"optimizer":[19,51,132,153],"states.":[20],"While":[21],"LoRA,":[22],"a":[23,98],"popular":[24],"parameter-efficient":[25],"method,":[26],"reduces":[27,131],"usage,":[29],"it":[30,130],"often":[31,84],"suffers":[32],"from":[33],"suboptimal":[34],"performance":[35,78],"constraints":[39],"low-rank":[41,60],"updates.":[42],"Low-rank":[43],"gradient":[44],"projection":[45,82],"methods":[46,120],"(e.g.,":[47],"GaLore,":[48],"Flora)":[49],"reduce":[50],"by":[53,134,155],"projecting":[54],"gradients":[55],"moment":[57],"estimates":[58],"into":[59],"spaces":[61],"via":[62],"singular":[63],"value":[64],"decomposition":[65],"or":[66],"random":[67],"projection.":[68],"However,":[69],"they":[70],"fail":[71],"account":[73],"for":[74,163],"inter-projection":[75],"correlation,":[76],"causing":[77],"degradation,":[79],"their":[81],"strategies":[83],"incur":[85],"high":[86],"computational":[87,103],"costs.":[88],"In":[89],"this":[90],"paper,":[91],"we":[92],"present":[93],"COAP":[94,117,151],"(Correlation-Aware":[95],"Gradient":[96],"Projection),":[97],"memory-efficient":[99],"method":[100],"that":[101],"minimizes":[102],"overhead":[104],"while":[105,166],"maintaining":[106],"training":[107,123],"performance.":[108,127],"Evaluated":[109],"across":[110],"various":[111],"language,":[113],"tasks,":[116],"outperforms":[118],"existing":[119],"both":[122],"speed":[124],"model":[126],"For":[128],"LLaMA-1B,":[129],"61%":[135],"with":[136],"only":[137],"2%":[138],"additional":[139],"time":[140],"cost,":[141],"achieving":[142],"same":[144],"PPL":[145],"as":[146],"AdamW.":[147],"With":[148],"8-bit":[149],"quantization,":[150],"cuts":[152],"81%":[156],"achieves":[158],"4x":[159],"speedup":[160],"over":[161],"GaLore":[162],"LLaVA-v1.5-7B":[164],"fine-tuning,":[165],"delivering":[167],"higher":[168],"accuracy.":[169]},"counts_by_year":[],"updated_date":"2026-03-25T23:56:10.502304","created_date":"2025-10-10T00:00:00"}
