{"id":"https://openalex.org/W4399455124","doi":"https://doi.org/10.48550/arxiv.2406.04112","title":"Compressible Dynamics in Deep Overparameterized Low-Rank Learning &amp; Adaptation","display_name":"Compressible Dynamics in Deep Overparameterized Low-Rank Learning &amp; Adaptation","publication_year":2024,"publication_date":"2024-06-06","ids":{"openalex":"https://openalex.org/W4399455124","doi":"https://doi.org/10.48550/arxiv.2406.04112"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2406.04112","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2406.04112","pdf_url":"https://arxiv.org/pdf/2406.04112","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":null},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2406.04112","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5088138032","display_name":"Can Yaras","orcid":"https://orcid.org/0000-0003-4306-3901"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Yaras, Can","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100396056","display_name":"Peng Wang","orcid":"https://orcid.org/0000-0002-6799-0745"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Peng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5029521003","display_name":"Laura Balzano","orcid":"https://orcid.org/0000-0003-2914-123X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Balzano, Laura","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5019924950","display_name":"Qing Qu","orcid":"https://orcid.org/0000-0001-9136-558X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Qu, Qing","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5088138032"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11206","display_name":"Model Reduction and Neural Networks","score":0.8885999917984009,"subfield":{"id":"https://openalex.org/subfields/3109","display_name":"Statistical and Nonlinear Physics"},"field":{"id":"https://openalex.org/fields/31","display_name":"Physics and Astronomy"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11206","display_name":"Model Reduction and Neural Networks","score":0.8885999917984009,"subfield":{"id":"https://openalex.org/subfields/3109","display_name":"Statistical and Nonlinear Physics"},"field":{"id":"https://openalex.org/fields/31","display_name":"Physics and Astronomy"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11512","display_name":"Anomaly Detection Techniques and Applications","score":0.8733000159263611,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.8307999968528748,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/dynamics","display_name":"Dynamics (music)","score":0.7145556807518005},{"id":"https://openalex.org/keywords/rank","display_name":"Rank (graph theory)","score":0.652522087097168},{"id":"https://openalex.org/keywords/adaptation","display_name":"Adaptation (eye)","score":0.6245207786560059},{"id":"https://openalex.org/keywords/compressibility","display_name":"Compressibility","score":0.5827266573905945},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.48292896151542664},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3339780569076538},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.29491472244262695},{"id":"https://openalex.org/keywords/aerospace-engineering","display_name":"Aerospace engineering","score":0.18965458869934082},{"id":"https://openalex.org/keywords/acoustics","display_name":"Acoustics","score":0.15104249119758606},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.1488610804080963},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.11667582392692566},{"id":"https://openalex.org/keywords/optics","display_name":"Optics","score":0.06194224953651428},{"id":"https://openalex.org/keywords/combinatorics","display_name":"Combinatorics","score":0.05720239877700806}],"concepts":[{"id":"https://openalex.org/C145912823","wikidata":"https://www.wikidata.org/wiki/Q113558","display_name":"Dynamics (music)","level":2,"score":0.7145556807518005},{"id":"https://openalex.org/C164226766","wikidata":"https://www.wikidata.org/wiki/Q7293202","display_name":"Rank (graph theory)","level":2,"score":0.652522087097168},{"id":"https://openalex.org/C139807058","wikidata":"https://www.wikidata.org/wiki/Q352374","display_name":"Adaptation (eye)","level":2,"score":0.6245207786560059},{"id":"https://openalex.org/C84655787","wikidata":"https://www.wikidata.org/wiki/Q8067817","display_name":"Compressibility","level":2,"score":0.5827266573905945},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.48292896151542664},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3339780569076538},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.29491472244262695},{"id":"https://openalex.org/C146978453","wikidata":"https://www.wikidata.org/wiki/Q3798668","display_name":"Aerospace engineering","level":1,"score":0.18965458869934082},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.15104249119758606},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.1488610804080963},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.11667582392692566},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.06194224953651428},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.05720239877700806}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2406.04112","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2406.04112","pdf_url":"https://arxiv.org/pdf/2406.04112","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":null},{"id":"doi:10.48550/arxiv.2406.04112","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2406.04112","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2406.04112","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2406.04112","pdf_url":"https://arxiv.org/pdf/2406.04112","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":null},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2012983134","https://openalex.org/W4388599312","https://openalex.org/W2090218458","https://openalex.org/W2053355289","https://openalex.org/W2368811332","https://openalex.org/W4256106877","https://openalex.org/W4205276374","https://openalex.org/W2390251251","https://openalex.org/W1972617237","https://openalex.org/W2997567050"],"abstract_inverted_index":{"While":[0],"overparameterization":[1,53],"in":[2,9,82],"machine":[3],"learning":[4,96],"models":[5],"offers":[6],"great":[7],"benefits":[8,51,122],"terms":[10],"of":[11,38,52,64,98,130,144,182],"optimization":[12],"and":[13,40,113,169],"generalization,":[14],"it":[15],"also":[16],"leads":[17],"to":[18,104,166],"increased":[19],"computational":[20,56],"requirements":[21],"as":[22,72,74,123],"model":[23,45,148],"sizes":[24],"grow.":[25],"In":[26,58,127],"this":[27,65],"work,":[28],"we":[29,47,60,92,110,150],"show":[30,93],"that":[31,94],"by":[32],"leveraging":[33],"the":[34,44,50,55,62,95,120,128,142,159,180],"inherent":[35],"low-dimensional":[36,107],"structures":[37],"data":[39],"compressible":[41],"dynamics":[42,97],"within":[43],"parameters,":[46],"can":[48,111],"reap":[49],"without":[54],"burdens.":[57],"practice,":[59],"demonstrate":[61],"effectiveness":[63,181],"approach":[66,79],"for":[67,85],"deep":[68,86,131],"low-rank":[69,88,161],"matrix":[70,89,101,132],"completion":[71],"well":[73],"fine-tuning":[75,191],"language":[76,147,187],"models.":[77],"Our":[78,195],"is":[80,197],"grounded":[81],"theoretical":[83],"findings":[84],"overparameterized":[87,125],"recovery,":[90],"where":[91],"each":[99],"weight":[100],"are":[102],"confined":[103],"an":[105],"invariant":[106],"subspace.":[108],"Consequently,":[109],"construct":[112],"train":[114],"compact,":[115],"highly":[116],"compressed":[117],"factorizations":[118],"possessing":[119],"same":[121],"their":[124],"counterparts.":[126],"context":[129],"completion,":[133],"our":[134],"technique":[135],"substantially":[136],"improves":[137,158],"training":[138],"efficiency":[139],"while":[140,174],"retaining":[141],"advantages":[143],"overparameterization.":[145],"For":[146],"fine-tuning,":[149],"propose":[151],"a":[152,170],"method":[153],"called":[154],"\"Deep":[155],"LoRA\",":[156],"which":[157],"existing":[160],"adaptation":[162],"(LoRA)":[163],"technique,":[164],"leading":[165],"reduced":[167],"overfitting":[168],"simplified":[171],"hyperparameter":[172],"setup,":[173],"maintaining":[175],"comparable":[176],"efficiency.":[177],"We":[178],"validate":[179],"Deep":[183],"LoRA":[184],"on":[185],"natural":[186],"tasks,":[188],"particularly":[189],"when":[190],"with":[192],"limited":[193],"data.":[194],"code":[196],"available":[198],"at":[199],"https://github.com/cjyaras/deep-lora-transformers.":[200]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-02-17T06:05:46.635709","created_date":"2025-10-10T00:00:00"}
