{"id":"https://openalex.org/W7147035318","doi":"https://doi.org/10.48550/arxiv.2603.28405","title":"EdgeDiT: Hardware-Aware Diffusion Transformers for Efficient On-Device Image Generation","display_name":"EdgeDiT: Hardware-Aware Diffusion Transformers for Efficient On-Device Image Generation","publication_year":2026,"publication_date":"2026-03-30","ids":{"openalex":"https://openalex.org/W7147035318","doi":"https://doi.org/10.48550/arxiv.2603.28405"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.28405","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.28405","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.28405","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5116594543","display_name":"Sravanth Kodavanti","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Kodavanti, Sravanth","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132713670","display_name":"Manjunath Arveti","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Arveti, Manjunath","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5116594542","display_name":"Sowmya Vajrala","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Vajrala, Sowmya","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057645083","display_name":"Srinivas Soumitri Miriyala","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Miriyala, Srinivas","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5132608790","display_name":"Vikram N R","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"R, Vikram N","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5116594543"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10502","display_name":"Advanced Memory and Neural Computing","score":0.37549999356269836,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10502","display_name":"Advanced Memory and Neural Computing","score":0.37549999356269836,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.15559999644756317,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12808","display_name":"Ferroelectric and Negative Capacitance Devices","score":0.11760000139474869,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.6273000240325928},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.6074000000953674},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.5248000025749207},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.47909998893737793},{"id":"https://openalex.org/keywords/benchmarking","display_name":"Benchmarking","score":0.45829999446868896},{"id":"https://openalex.org/keywords/mobile-device","display_name":"Mobile device","score":0.37940001487731934},{"id":"https://openalex.org/keywords/generative-grammar","display_name":"Generative grammar","score":0.3619999885559082}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6809999942779541},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.6273000240325928},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.6074000000953674},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.5248000025749207},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.47909998893737793},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.46369999647140503},{"id":"https://openalex.org/C86251818","wikidata":"https://www.wikidata.org/wiki/Q816754","display_name":"Benchmarking","level":2,"score":0.45829999446868896},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4129999876022339},{"id":"https://openalex.org/C186967261","wikidata":"https://www.wikidata.org/wiki/Q5082128","display_name":"Mobile device","level":2,"score":0.37940001487731934},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.3619999885559082},{"id":"https://openalex.org/C99844830","wikidata":"https://www.wikidata.org/wiki/Q102441924","display_name":"Scaling","level":2,"score":0.34459999203681946},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.3398999869823456},{"id":"https://openalex.org/C9417928","wikidata":"https://www.wikidata.org/wiki/Q1070689","display_name":"Image processing","level":3,"score":0.33550000190734863},{"id":"https://openalex.org/C105339364","wikidata":"https://www.wikidata.org/wiki/Q2297740","display_name":"Software deployment","level":2,"score":0.3319999873638153},{"id":"https://openalex.org/C2781357197","wikidata":"https://www.wikidata.org/wiki/Q5757597","display_name":"High memory","level":2,"score":0.32249999046325684},{"id":"https://openalex.org/C138236772","wikidata":"https://www.wikidata.org/wiki/Q25098575","display_name":"Edge device","level":3,"score":0.30300000309944153},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.289900004863739},{"id":"https://openalex.org/C24326235","wikidata":"https://www.wikidata.org/wiki/Q126095","display_name":"Electronic engineering","level":1,"score":0.2856000065803528},{"id":"https://openalex.org/C179799912","wikidata":"https://www.wikidata.org/wiki/Q205084","display_name":"Computational complexity theory","level":2,"score":0.2581000030040741}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.28405","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.28405","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.28405","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.28405","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Diffusion":[0],"Transformers":[1],"(DiT)":[2],"have":[3],"established":[4],"a":[5,33,59,84,91,96,102,128,162],"new":[6],"state-of-the-art":[7],"in":[8,94,99,105],"high-fidelity":[9],"image":[10],"synthesis;":[11],"however,":[12],"their":[13],"massive":[14],"computational":[15],"complexity":[16],"and":[17,52,66,101,137,146,154],"memory":[18],"requirements":[19],"hinder":[20],"local":[21],"deployment":[22],"on":[23],"resource-constrained":[24],"edge":[25],"devices.":[26],"In":[27],"this":[28],"paper,":[29],"we":[30,63],"introduce":[31],"EdgeDiT,":[32],"family":[34],"of":[35,86,117,176],"hardware-efficient":[36],"generative":[37,156],"transformers":[38],"specifically":[39],"engineered":[40],"for":[41,78,165],"mobile":[42,79,144],"Neural":[43,54],"Processing":[44],"Units":[45],"(NPUs),":[46],"such":[47],"as":[48],"the":[49,71,110,114,118,174,177],"Qualcomm":[50],"Hexagon":[51],"Apple":[53],"Engine":[55],"(ANE).":[56],"By":[57,150],"leveraging":[58],"hardware-aware":[60],"optimization":[61],"framework,":[62],"systematically":[64],"identify":[65],"prune":[67],"structural":[68],"redundancies":[69],"within":[70],"DiT":[72,148],"backbone":[73],"that":[74,89,125],"are":[75],"particularly":[76],"taxing":[77],"data-flows.":[80],"Our":[81],"approach":[82],"yields":[83],"series":[85],"lightweight":[87],"models":[88,169],"achieve":[90],"20-30%":[92],"reduction":[93,104],"parameters,":[95],"36-46%":[97],"decrease":[98],"FLOPs,":[100],"1.65-fold":[103],"on-device":[106],"latency":[107,139],"without":[108],"sacrificing":[109],"scaling":[111],"advantages":[112],"or":[113],"expressive":[115],"capacity":[116],"original":[119],"transformer":[120],"architecture.":[121],"Extensive":[122],"benchmarking":[123],"demonstrates":[124],"EdgeDiT":[126,160],"offers":[127],"superior":[129],"Pareto-optimal":[130],"trade-off":[131],"between":[132],"Frechet":[133],"Inception":[134],"Distance":[135],"(FID)":[136],"inference":[138],"compared":[140],"to":[141,173],"both":[142],"optimized":[143],"U-Nets":[145],"vanilla":[147],"variants.":[149],"enabling":[151],"responsive,":[152],"private,":[153],"offline":[155],"AI":[157],"directly":[158],"on-device,":[159],"provides":[161],"scalable":[163],"blueprint":[164],"transitioning":[166],"large-scale":[167],"foundation":[168],"from":[170],"high-end":[171],"GPUs":[172],"palm":[175],"user.":[178]},"counts_by_year":[],"updated_date":"2026-04-02T13:53:19.096889","created_date":"2026-04-02T00:00:00"}
