{"id":"https://openalex.org/W7140172009","doi":"https://doi.org/10.48550/arxiv.2603.20755","title":"Memory-Efficient Fine-Tuning Diffusion Transformers via Dynamic Patch Sampling and Block Skipping","display_name":"Memory-Efficient Fine-Tuning Diffusion Transformers via Dynamic Patch Sampling and Block Skipping","publication_year":2026,"publication_date":"2026-03-21","ids":{"openalex":"https://openalex.org/W7140172009","doi":"https://doi.org/10.48550/arxiv.2603.20755"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.20755","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.20755","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.20755","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Park, Sunghyun","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Park, Sunghyun","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Kim, Jeongho","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kim, Jeongho","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Park, Hyoungwoo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Park, Hyoungwoo","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Das, Debasmit","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Das, Debasmit","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Yun, Sungrack","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yun, Sungrack","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Hayat, Munawar","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hayat, Munawar","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Choo, Jaegul","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Choo, Jaegul","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Porikli, Fatih","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Porikli, Fatih","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":null,"display_name":"Choi, Seokeon","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Choi, Seokeon","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":9,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.27950000762939453,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.27950000762939453,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10481","display_name":"Computer Graphics and Visualization Techniques","score":0.10369999706745148,"subfield":{"id":"https://openalex.org/subfields/1704","display_name":"Computer Graphics and Computer-Aided Design"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11019","display_name":"Image Enhancement Techniques","score":0.044599998742341995,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/residual","display_name":"Residual","score":0.7311000227928162},{"id":"https://openalex.org/keywords/block","display_name":"Block (permutation group theory)","score":0.5174999833106995},{"id":"https://openalex.org/keywords/software-deployment","display_name":"Software deployment","score":0.3984000086784363},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.3912000060081482},{"id":"https://openalex.org/keywords/limiting","display_name":"Limiting","score":0.3896999955177307},{"id":"https://openalex.org/keywords/sampling","display_name":"Sampling (signal processing)","score":0.36149999499320984}],"concepts":[{"id":"https://openalex.org/C155512373","wikidata":"https://www.wikidata.org/wiki/Q287450","display_name":"Residual","level":2,"score":0.7311000227928162},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7080000042915344},{"id":"https://openalex.org/C2777210771","wikidata":"https://www.wikidata.org/wiki/Q4927124","display_name":"Block (permutation group theory)","level":2,"score":0.5174999833106995},{"id":"https://openalex.org/C105339364","wikidata":"https://www.wikidata.org/wiki/Q2297740","display_name":"Software deployment","level":2,"score":0.3984000086784363},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.3912000060081482},{"id":"https://openalex.org/C188198153","wikidata":"https://www.wikidata.org/wiki/Q1613840","display_name":"Limiting","level":2,"score":0.3896999955177307},{"id":"https://openalex.org/C140779682","wikidata":"https://www.wikidata.org/wiki/Q210868","display_name":"Sampling (signal processing)","level":3,"score":0.36149999499320984},{"id":"https://openalex.org/C183003079","wikidata":"https://www.wikidata.org/wiki/Q1000371","display_name":"Personalization","level":2,"score":0.3427000045776367},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.32919999957084656},{"id":"https://openalex.org/C24326235","wikidata":"https://www.wikidata.org/wiki/Q126095","display_name":"Electronic engineering","level":1,"score":0.321399986743927},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.31299999356269836},{"id":"https://openalex.org/C44154836","wikidata":"https://www.wikidata.org/wiki/Q45045","display_name":"Simulation","level":1,"score":0.3037000000476837},{"id":"https://openalex.org/C37404715","wikidata":"https://www.wikidata.org/wiki/Q380679","display_name":"Dynamic programming","level":2,"score":0.28519999980926514},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.2818000018596649},{"id":"https://openalex.org/C69357855","wikidata":"https://www.wikidata.org/wiki/Q163214","display_name":"Diffusion","level":2,"score":0.2750000059604645},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.27160000801086426},{"id":"https://openalex.org/C179799912","wikidata":"https://www.wikidata.org/wiki/Q205084","display_name":"Computational complexity theory","level":2,"score":0.26429998874664307},{"id":"https://openalex.org/C512564126","wikidata":"https://www.wikidata.org/wiki/Q7257959","display_name":"Public records","level":2,"score":0.257099986076355}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.20755","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.20755","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.20755","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.20755","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Diffusion":[0],"Transformers":[1],"(DiTs)":[2],"have":[3],"significantly":[4,120],"enhanced":[5],"text-to-image":[6],"(T2I)":[7],"generation":[8],"quality,":[9],"enabling":[10],"high-quality":[11],"personalized":[12],"content":[13],"creation.":[14],"However,":[15],"fine-tuning":[16,39],"these":[17,33],"models":[18],"requires":[19],"substantial":[20],"computational":[21],"complexity":[22],"and":[23,48,97,112,150],"memory,":[24],"limiting":[25],"practical":[26],"deployment":[27],"under":[28],"resource":[29],"constraints.":[30],"To":[31,124],"tackle":[32],"challenges,":[34],"we":[35,130],"propose":[36],"a":[37,74,132],"memory-efficient":[38],"framework":[40],"called":[41],"DiT-BlockSkip,":[42],"integrating":[43],"timestep-aware":[44],"dynamic":[45,56],"patch":[46,57,61],"sampling":[47,58],"block":[49,104,133],"skipping":[50,105],"by":[51],"precomputing":[52],"residual":[53,114],"features.":[54],"Our":[55],"strategy":[59,135],"adjusts":[60],"sizes":[62],"based":[63,136],"on":[64,137],"the":[65,70,88,117],"diffusion":[66,167],"timestep,":[67],"then":[68],"resizes":[69],"cropped":[71],"patches":[72],"to":[73,90],"fixed":[75],"lower":[76,101],"resolution.":[77],"This":[78],"approach":[79,144],"reduces":[80],"forward":[81],"&amp;":[82],"backward":[83],"memory":[84,154],"usage":[85,155],"while":[86,152],"allowing":[87],"model":[89],"capture":[91],"global":[92],"structures":[93],"at":[94,100],"higher":[95],"timesteps":[96],"fine-grained":[98],"details":[99],"timesteps.":[102],"The":[103],"mechanism":[106],"selectively":[107],"fine-tunes":[108],"essential":[109],"transformer":[110],"blocks":[111,127],"precomputes":[113],"features":[115],"for":[116,128,165],"skipped":[118],"blocks,":[119],"reducing":[121,153],"training":[122],"memory.":[123],"identify":[125],"vital":[126],"personalization,":[129],"introduce":[131],"selection":[134],"cross-attention":[138],"masking.":[139],"Evaluations":[140],"demonstrate":[141],"that":[142],"our":[143],"achieves":[145],"competitive":[146],"personalization":[147],"performance":[148],"qualitatively":[149],"quantitatively,":[151],"substantially,":[156],"moving":[157],"toward":[158],"on-device":[159],"feasibility":[160],"(e.g.,":[161],"smartphones,":[162],"IoT":[163],"devices)":[164],"large-scale":[166],"transformers.":[168]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-03-25T00:00:00"}
