{"id":"https://openalex.org/W7164149330","doi":"https://doi.org/10.1109/fccm68464.2026.00062","title":"An Efficient Dataflow Framework for DiT-Based Image Generation","display_name":"An Efficient Dataflow Framework for DiT-Based Image Generation","publication_year":2026,"publication_date":"2026-05-13","ids":{"openalex":"https://openalex.org/W7164149330","doi":"https://doi.org/10.1109/fccm68464.2026.00062"},"language":null,"primary_location":{"id":"doi:10.1109/fccm68464.2026.00062","is_oa":false,"landing_page_url":"https://doi.org/10.1109/fccm68464.2026.00062","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2026 IEEE 34th Annual International Symposium on Field-Programmable Custom Computing Machines (FCCM)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5084010732","display_name":"Y Zhang","orcid":null},"institutions":[{"id":"https://openalex.org/I8078737","display_name":"Clemson University","ror":"https://ror.org/037s24f05","country_code":"US","type":"education","lineage":["https://openalex.org/I8078737"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yazhe Zhang","raw_affiliation_strings":["Clemson University,Electrical and Computer Engineering,Clemson,South Carolina,US"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Clemson University,Electrical and Computer Engineering,Clemson,South Carolina,US","institution_ids":["https://openalex.org/I8078737"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5025040115","display_name":"Shouyu Du","orcid":null},"institutions":[{"id":"https://openalex.org/I8078737","display_name":"Clemson University","ror":"https://ror.org/037s24f05","country_code":"US","type":"education","lineage":["https://openalex.org/I8078737"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Shouyu Du","raw_affiliation_strings":["Clemson University,Electrical and Computer Engineering,Clemson,South Carolina,US"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Clemson University,Electrical and Computer Engineering,Clemson,South Carolina,US","institution_ids":["https://openalex.org/I8078737"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101545996","display_name":"Zhenyu Xu","orcid":"https://orcid.org/0000-0001-8649-8062"},"institutions":[{"id":"https://openalex.org/I8078737","display_name":"Clemson University","ror":"https://ror.org/037s24f05","country_code":"US","type":"education","lineage":["https://openalex.org/I8078737"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Zhenyu Xu","raw_affiliation_strings":["Clemson University,Electrical and Computer Engineering,Clemson,South Carolina,US"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Clemson University,Electrical and Computer Engineering,Clemson,South Carolina,US","institution_ids":["https://openalex.org/I8078737"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013631588","display_name":"Miaoxiang Yu","orcid":"https://orcid.org/0000-0002-4382-9009"},"institutions":[{"id":"https://openalex.org/I8078737","display_name":"Clemson University","ror":"https://ror.org/037s24f05","country_code":"US","type":"education","lineage":["https://openalex.org/I8078737"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Miaoxiang Yu","raw_affiliation_strings":["Clemson University,Electrical and Computer Engineering,Clemson,South Carolina,US"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Clemson University,Electrical and Computer Engineering,Clemson,South Carolina,US","institution_ids":["https://openalex.org/I8078737"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043424584","display_name":"Dingjiang Yan","orcid":"https://orcid.org/0000-0001-5935-1700"},"institutions":[{"id":"https://openalex.org/I8078737","display_name":"Clemson University","ror":"https://ror.org/037s24f05","country_code":"US","type":"education","lineage":["https://openalex.org/I8078737"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Dingjiang Yan","raw_affiliation_strings":["Clemson University,Electrical and Computer Engineering,Clemson,South Carolina,US"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Clemson University,Electrical and Computer Engineering,Clemson,South Carolina,US","institution_ids":["https://openalex.org/I8078737"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5138299242","display_name":"Zhiheng Ni","orcid":null},"institutions":[{"id":"https://openalex.org/I8078737","display_name":"Clemson University","ror":"https://ror.org/037s24f05","country_code":"US","type":"education","lineage":["https://openalex.org/I8078737"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Zhiheng Ni","raw_affiliation_strings":["Clemson University,Electrical and Computer Engineering,Clemson,South Carolina,US"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Clemson University,Electrical and Computer Engineering,Clemson,South Carolina,US","institution_ids":["https://openalex.org/I8078737"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5138348525","display_name":"Qing Yang","orcid":null},"institutions":[{"id":"https://openalex.org/I17626003","display_name":"University of Rhode Island","ror":"https://ror.org/013ckk937","country_code":"US","type":"education","lineage":["https://openalex.org/I17626003"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Qing Yang","raw_affiliation_strings":["University of Rhode Island,Electrical and Computer Engineering,Kingston,Rhode Island,US"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Rhode Island,Electrical and Computer Engineering,Kingston,Rhode Island,US","institution_ids":["https://openalex.org/I17626003"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5138287797","display_name":"Tao Wei","orcid":null},"institutions":[{"id":"https://openalex.org/I8078737","display_name":"Clemson University","ror":"https://ror.org/037s24f05","country_code":"US","type":"education","lineage":["https://openalex.org/I8078737"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Tao Wei","raw_affiliation_strings":["Clemson University,Electrical and Computer Engineering,Clemson,South Carolina,US"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Clemson University,Electrical and Computer Engineering,Clemson,South Carolina,US","institution_ids":["https://openalex.org/I8078737"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":8,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.96107917,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"274","last_page":"274"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10481","display_name":"Computer Graphics and Visualization Techniques","score":0.32739999890327454,"subfield":{"id":"https://openalex.org/subfields/1704","display_name":"Computer Graphics and Computer-Aided Design"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10481","display_name":"Computer Graphics and Visualization Techniques","score":0.32739999890327454,"subfield":{"id":"https://openalex.org/subfields/1704","display_name":"Computer Graphics and Computer-Aided Design"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11019","display_name":"Image Enhancement Techniques","score":0.06939999759197235,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11986","display_name":"Scientific Computing and Data Management","score":0.051500000059604645,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.484499990940094},{"id":"https://openalex.org/keywords/image-processing","display_name":"Image processing","score":0.41339999437332153},{"id":"https://openalex.org/keywords/dataflow","display_name":"Dataflow","score":0.3271999955177307},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.2856999933719635},{"id":"https://openalex.org/keywords/image-segmentation","display_name":"Image segmentation","score":0.27459999918937683}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6456000208854675},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.484499990940094},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.44179999828338623},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.420199990272522},{"id":"https://openalex.org/C9417928","wikidata":"https://www.wikidata.org/wiki/Q1070689","display_name":"Image processing","level":3,"score":0.41339999437332153},{"id":"https://openalex.org/C96324660","wikidata":"https://www.wikidata.org/wiki/Q205446","display_name":"Dataflow","level":2,"score":0.3271999955177307},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.29989999532699585},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.2856999933719635},{"id":"https://openalex.org/C121684516","wikidata":"https://www.wikidata.org/wiki/Q7600677","display_name":"Computer graphics (images)","level":1,"score":0.28529998660087585},{"id":"https://openalex.org/C124504099","wikidata":"https://www.wikidata.org/wiki/Q56933","display_name":"Image segmentation","level":3,"score":0.27459999918937683},{"id":"https://openalex.org/C2987933465","wikidata":"https://www.wikidata.org/wiki/Q141130","display_name":"Image manipulation","level":3,"score":0.2572999894618988},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.25429999828338623},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.2531000077724457}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/fccm68464.2026.00062","is_oa":false,"landing_page_url":"https://doi.org/10.1109/fccm68464.2026.00062","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2026 IEEE 34th Annual International Symposium on Field-Programmable Custom Computing Machines (FCCM)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/7","display_name":"Affordable and clean energy","score":0.42250436544418335}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":1,"referenced_works":["https://openalex.org/W4400489024"],"related_works":[],"abstract_inverted_index":{"We":[0,40,88],"present":[1],"a":[2,52,123,143,194,275],"general":[3],"and":[4,24,35,73,93,104,118,128,162,185,189,254,278,298,322],"efficient":[5,220],"dataflow":[6,56,284,300],"framework":[7,19,177,285],"for":[8,268,313],"mapping":[9],"Diffusion":[10],"Transformer":[11],"(DiT)-based":[12],"models":[13,297],"onto":[14],"tiled":[15],"mesh":[16],"accelerators.":[17,301],"The":[18,138,319],"explicitly":[20],"orchestrates":[21],"tiling,":[22],"streaming,":[23],"Direct":[25],"Memory":[26],"Access":[27],"(DMA)-driven":[28],"data":[29],"movement":[30],"to":[31,152,171,294],"efficiently":[32],"execute":[33],"attention":[34],"large":[36],"matrix":[37],"multiplication":[38],"(MM).":[39],"use":[41],"the":[42,61,67,70,80,97,102,108,114,119,176,204,208,224,230,237,243,250,282,305],"AMD":[43],"Ryzen":[44],"AI":[45],"Neural":[46],"Processing":[47],"Unit":[48],"(NPU)":[49],"[1]":[50],"as":[51,142,153,310],"representative":[53,180,280],"edge-class":[54],"tiled-mesh":[55],"platform.":[57],"In":[58],"our":[59,91],"implementation,":[60],"DiT":[62],"denoising":[63,83,98,111,258],"stage":[64],"runs":[65],"on":[66,79,96,107,178,274],"NPU,":[68],"while":[69,100,214,229,242],"text":[71],"encoder":[72],"Variational":[74],"Autoencoder":[75],"(VAE)":[76],"decoder":[77],"remain":[78],"CPU.The":[81],"iterative":[82],"loop":[84],"dominates":[85],"end-to-end":[86,191],"latency.":[87,259],"therefore":[89],"focus":[90],"acceleration":[92],"optimization":[94],"efforts":[95],"module,":[99],"keeping":[101],"text-embedding":[103],"VAE-decoder":[105],"stages":[106],"CPU.":[109],"Each":[110],"step":[112,124],"reuses":[113],"same":[115],"transformer":[116],"backbone,":[117],"dominant":[120],"operations":[121],"within":[122],"are":[125,325],"MM,":[126],"self-attention,":[127],"MLP":[129],"submodules":[130],"that":[131,249],"combine":[132],"MM":[133],"with":[134,203],"elementwise":[135],"nonlinear":[136],"operations.":[137],"NPU":[139,209,231,244,251,277],"is":[140,286],"organized":[141],"two-dimensional":[144],"array":[145],"of":[146,193,307],"compute":[147],"tiles":[148,161],"(CTs),":[149],"also":[150],"referred":[151],"AIEs.":[154],"Our":[155,302],"design":[156],"partitions":[157],"input":[158],"matrices":[159],"into":[160,263],"maps":[163],"independent":[164],"tile":[165],"computations":[166],"across":[167],"multiple":[168],"AIE":[169],"cores":[170],"exploit":[172],"spatial":[173,308],"parallelism.We":[174],"evaluate":[175],"two":[179,279],"DiT-based":[181],"text-to-image":[182],"models,":[183,281],"FLUX.1-schnell":[184],"Z-Image-Turbo":[186],"[2],":[187],"[3],":[188],"achieve":[190],"generation":[192,212],"high-quality":[195],"image":[196,270],"in":[197],"just":[198],"over":[199],"one":[200],"minute.":[201],"Compared":[202],"integrated":[205],"GPU":[206],"(iGPU),":[207],"delivers":[210],"comparable":[211],"latency":[213],"being":[215],"approximately":[216],"4.4\u00d7":[217],"more":[218],"power":[219],"(package).":[221],"For":[222,235],"FLUX.1-schnell,":[223],"iGPU":[225,238,257],"achieves":[226,232,239,245],"10.5":[227],"s/step,":[228,241,247],"13.3":[233],"s/step.":[234],"Z-Image-Turbo,":[236],"10.1":[240],"9.8":[246],"showing":[248],"can":[252],"match":[253],"slightly":[255],"surpass":[256],"These":[260],"results":[261,303],"translate":[262],"substantially":[264],"improved":[265],"energy":[266],"efficiency":[267],"on-device":[269,315],"generation.":[271],"Although":[272],"demonstrated":[273],"specific":[276],"proposed":[283],"neither":[287],"hardware-specific":[288],"nor":[289],"model-specific.":[290],"It":[291],"generalizes":[292],"naturally":[293],"other":[295],"diffusion-family":[296],"mesh-based":[299],"highlight":[304],"potential":[306],"NPUs":[309],"energy-efficient":[311],"platforms":[312],"next-generation":[314],"generative":[316],"AI.Code":[317],"Availability:":[318],"source":[320],"code":[321],"setup":[323],"instructions":[324],"available":[326],"at:":[327],"https://github.com/jia1217/vigenflow.":[328]},"counts_by_year":[],"updated_date":"2026-06-12T06:20:11.936012","created_date":"2026-06-11T00:00:00"}
