{"id":"https://openalex.org/W4416429512","doi":"https://doi.org/10.1109/iccad66269.2025.11240863","title":"DynVec: An End-to-End Framework for Efficient Vector-Dataflow Execution","display_name":"DynVec: An End-to-End Framework for Efficient Vector-Dataflow Execution","publication_year":2025,"publication_date":"2025-10-26","ids":{"openalex":"https://openalex.org/W4416429512","doi":"https://doi.org/10.1109/iccad66269.2025.11240863"},"language":null,"primary_location":{"id":"doi:10.1109/iccad66269.2025.11240863","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iccad66269.2025.11240863","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/ACM International Conference On Computer Aided Design (ICCAD)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5042297301","display_name":"Jiangnan Li","orcid":"https://orcid.org/0009-0000-8726-4292"},"institutions":[{"id":"https://openalex.org/I24943067","display_name":"Fudan University","ror":"https://ror.org/013q1eq08","country_code":"CN","type":"education","lineage":["https://openalex.org/I24943067"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Jiangnan Li","raw_affiliation_strings":["Fudan University,State Key Laboratory of Integrated Chips and Systems,Shanghai,China"],"affiliations":[{"raw_affiliation_string":"Fudan University,State Key Laboratory of Integrated Chips and Systems,Shanghai,China","institution_ids":["https://openalex.org/I24943067"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101217592","display_name":"Xianfeng Cao","orcid":null},"institutions":[{"id":"https://openalex.org/I24943067","display_name":"Fudan University","ror":"https://ror.org/013q1eq08","country_code":"CN","type":"education","lineage":["https://openalex.org/I24943067"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xianfeng Cao","raw_affiliation_strings":["Fudan University,State Key Laboratory of Integrated Chips and Systems,Shanghai,China"],"affiliations":[{"raw_affiliation_string":"Fudan University,State Key Laboratory of Integrated Chips and Systems,Shanghai,China","institution_ids":["https://openalex.org/I24943067"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5060351696","display_name":"Kaixiang Zhu","orcid":"https://orcid.org/0009-0007-9096-3722"},"institutions":[{"id":"https://openalex.org/I24943067","display_name":"Fudan University","ror":"https://ror.org/013q1eq08","country_code":"CN","type":"education","lineage":["https://openalex.org/I24943067"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Kaixiang Zhu","raw_affiliation_strings":["Fudan University,State Key Laboratory of Integrated Chips and Systems,Shanghai,China"],"affiliations":[{"raw_affiliation_string":"Fudan University,State Key Laboratory of Integrated Chips and Systems,Shanghai,China","institution_ids":["https://openalex.org/I24943067"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5030750530","display_name":"Wenbo Yin","orcid":"https://orcid.org/0009-0004-1507-3242"},"institutions":[{"id":"https://openalex.org/I24943067","display_name":"Fudan University","ror":"https://ror.org/013q1eq08","country_code":"CN","type":"education","lineage":["https://openalex.org/I24943067"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wenbo Yin","raw_affiliation_strings":["Fudan University,State Key Laboratory of Integrated Chips and Systems,Shanghai,China"],"affiliations":[{"raw_affiliation_string":"Fudan University,State Key Laboratory of Integrated Chips and Systems,Shanghai,China","institution_ids":["https://openalex.org/I24943067"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5002732486","display_name":"Lingli Wang","orcid":"https://orcid.org/0000-0002-0579-3527"},"institutions":[{"id":"https://openalex.org/I24943067","display_name":"Fudan University","ror":"https://ror.org/013q1eq08","country_code":"CN","type":"education","lineage":["https://openalex.org/I24943067"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lingli Wang","raw_affiliation_strings":["Fudan University,State Key Laboratory of Integrated Chips and Systems,Shanghai,China"],"affiliations":[{"raw_affiliation_string":"Fudan University,State Key Laboratory of Integrated Chips and Systems,Shanghai,China","institution_ids":["https://openalex.org/I24943067"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5042297301"],"corresponding_institution_ids":["https://openalex.org/I24943067"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.38076049,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"9"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.5232999920845032,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.5232999920845032,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10904","display_name":"Embedded Systems Design Techniques","score":0.36629998683929443,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10126","display_name":"Logic, programming, and type systems","score":0.06310000270605087,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/dataflow","display_name":"Dataflow","score":0.8691999912261963},{"id":"https://openalex.org/keywords/compiler","display_name":"Compiler","score":0.6061999797821045},{"id":"https://openalex.org/keywords/vectorization","display_name":"Vectorization (mathematics)","score":0.5978000164031982},{"id":"https://openalex.org/keywords/control-flow","display_name":"Control flow","score":0.593500018119812},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.4977000057697296},{"id":"https://openalex.org/keywords/control-flow-graph","display_name":"Control flow graph","score":0.43299999833106995},{"id":"https://openalex.org/keywords/compile-time","display_name":"Compile time","score":0.4327000081539154},{"id":"https://openalex.org/keywords/runtime-system","display_name":"Runtime system","score":0.42160001397132874},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.4140999913215637},{"id":"https://openalex.org/keywords/data-parallelism","display_name":"Data parallelism","score":0.39329999685287476}],"concepts":[{"id":"https://openalex.org/C96324660","wikidata":"https://www.wikidata.org/wiki/Q205446","display_name":"Dataflow","level":2,"score":0.8691999912261963},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8640999794006348},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.7519000172615051},{"id":"https://openalex.org/C169590947","wikidata":"https://www.wikidata.org/wiki/Q47506","display_name":"Compiler","level":2,"score":0.6061999797821045},{"id":"https://openalex.org/C41681595","wikidata":"https://www.wikidata.org/wiki/Q7917855","display_name":"Vectorization (mathematics)","level":2,"score":0.5978000164031982},{"id":"https://openalex.org/C160191386","wikidata":"https://www.wikidata.org/wiki/Q868299","display_name":"Control flow","level":2,"score":0.593500018119812},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.4977000057697296},{"id":"https://openalex.org/C27458966","wikidata":"https://www.wikidata.org/wiki/Q1187693","display_name":"Control flow graph","level":2,"score":0.43299999833106995},{"id":"https://openalex.org/C200833197","wikidata":"https://www.wikidata.org/wiki/Q333707","display_name":"Compile time","level":3,"score":0.4327000081539154},{"id":"https://openalex.org/C2780870223","wikidata":"https://www.wikidata.org/wiki/Q1004415","display_name":"Runtime system","level":2,"score":0.42160001397132874},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.4140999913215637},{"id":"https://openalex.org/C61483411","wikidata":"https://www.wikidata.org/wiki/Q3124522","display_name":"Data parallelism","level":3,"score":0.39329999685287476},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.392300009727478},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.3837999999523163},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.3698999881744385},{"id":"https://openalex.org/C88468194","wikidata":"https://www.wikidata.org/wiki/Q1172416","display_name":"Data-flow analysis","level":3,"score":0.3605000078678131},{"id":"https://openalex.org/C83283714","wikidata":"https://www.wikidata.org/wiki/Q121117","display_name":"Supercomputer","level":2,"score":0.35839998722076416},{"id":"https://openalex.org/C140763907","wikidata":"https://www.wikidata.org/wiki/Q2714055","display_name":"Instruction-level parallelism","level":3,"score":0.35670000314712524},{"id":"https://openalex.org/C2778100165","wikidata":"https://www.wikidata.org/wiki/Q1589327","display_name":"Memory hierarchy","level":3,"score":0.3547999858856201},{"id":"https://openalex.org/C176727019","wikidata":"https://www.wikidata.org/wiki/Q1172415","display_name":"Dataflow architecture","level":3,"score":0.3440999984741211},{"id":"https://openalex.org/C489000","wikidata":"https://www.wikidata.org/wiki/Q747385","display_name":"Data flow diagram","level":2,"score":0.33570000529289246},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.31839999556541443},{"id":"https://openalex.org/C76782552","wikidata":"https://www.wikidata.org/wiki/Q110546","display_name":"Just-in-time compilation","level":3,"score":0.30799999833106995},{"id":"https://openalex.org/C119701452","wikidata":"https://www.wikidata.org/wiki/Q5165881","display_name":"Control reconfiguration","level":2,"score":0.2944999933242798},{"id":"https://openalex.org/C78766204","wikidata":"https://www.wikidata.org/wiki/Q555032","display_name":"Multi-core processor","level":2,"score":0.2883000075817108},{"id":"https://openalex.org/C188854837","wikidata":"https://www.wikidata.org/wiki/Q268469","display_name":"Software pipelining","level":3,"score":0.2833000123500824},{"id":"https://openalex.org/C42992933","wikidata":"https://www.wikidata.org/wiki/Q691169","display_name":"Task parallelism","level":3,"score":0.28110000491142273},{"id":"https://openalex.org/C41112130","wikidata":"https://www.wikidata.org/wiki/Q2146175","display_name":"Retiming","level":2,"score":0.28029999136924744},{"id":"https://openalex.org/C133162039","wikidata":"https://www.wikidata.org/wiki/Q1061077","display_name":"Code generation","level":3,"score":0.2766999900341034},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.26739999651908875},{"id":"https://openalex.org/C164833996","wikidata":"https://www.wikidata.org/wiki/Q2323839","display_name":"Automatic parallelization","level":3,"score":0.2671000063419342},{"id":"https://openalex.org/C142962650","wikidata":"https://www.wikidata.org/wiki/Q240838","display_name":"Reconfigurable computing","level":3,"score":0.26660001277923584},{"id":"https://openalex.org/C119024030","wikidata":"https://www.wikidata.org/wiki/Q759899","display_name":"Call stack","level":3,"score":0.2646999955177307},{"id":"https://openalex.org/C91481028","wikidata":"https://www.wikidata.org/wiki/Q1054686","display_name":"Distributed memory","level":3,"score":0.26269999146461487},{"id":"https://openalex.org/C58013763","wikidata":"https://www.wikidata.org/wiki/Q5754574","display_name":"High-level synthesis","level":3,"score":0.2612000107765198},{"id":"https://openalex.org/C150552126","wikidata":"https://www.wikidata.org/wiki/Q339387","display_name":"SIMD","level":2,"score":0.2524000108242035}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/iccad66269.2025.11240863","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iccad66269.2025.11240863","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/ACM International Conference On Computer Aided Design (ICCAD)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320329860","display_name":"National Science and Technology Major Project","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":21,"referenced_works":["https://openalex.org/W2026844540","https://openalex.org/W2050684782","https://openalex.org/W2798404616","https://openalex.org/W2979322703","https://openalex.org/W2994193159","https://openalex.org/W3008658471","https://openalex.org/W3108085245","https://openalex.org/W3122286897","https://openalex.org/W3136950882","https://openalex.org/W3195430069","https://openalex.org/W3205717712","https://openalex.org/W3211485653","https://openalex.org/W3212867926","https://openalex.org/W4245767887","https://openalex.org/W4246166885","https://openalex.org/W4281621335","https://openalex.org/W4319870650","https://openalex.org/W4387495975","https://openalex.org/W4389166674","https://openalex.org/W4402501070","https://openalex.org/W4407318935"],"related_works":[],"abstract_inverted_index":{"High-performance":[0],"computing":[1],"(HPC)":[2],"and":[3,15,34,53,69,84,124,166,169,184,200,224,237],"hardware":[4,72],"acceleration":[5],"increasingly":[6],"rely":[7],"on":[8],"dataflow":[9,58,152],"architectures":[10],"to":[11,36,126,217,229],"achieve":[12],"scalable":[13],"parallelism":[14,64,129,236],"efficiency.":[16],"High-level":[17],"synthesis":[18],"(HLS)":[19],"facilitates":[20],"accelerator":[21],"design":[22],"from":[23],"high-level":[24,174],"programs,":[25],"but":[26],"conventional":[27,131],"tools":[28],"often":[29],"require":[30],"intrusive":[31],"source-level":[32],"modifications":[33],"struggle":[35],"optimize":[37],"irregular":[38,91,138,225],"workloads.":[39],"Dynamically":[40],"scheduled":[41],"HLS":[42,219],"frameworks":[43],"offer":[44],"a":[45,116,155,204],"promising":[46],"direction":[47],"for":[48],"addressing":[49],"control":[50,92,162,167],"flow":[51,93,163],"divergence":[52],"memory":[54,82,170],"irregularity":[55],"by":[56,197],"generating":[57],"accelerators.":[59],"However,":[60],"they":[61],"lack":[62],"compile-time":[63],"optimizations":[65],"such":[66,97],"as":[67],"vectorization":[68,108,125,136,180],"incur":[70],"significant":[71,214],"overhead.":[73],"Moreover,":[74,227],"modern":[75],"compilers":[76],"can":[77],"generate":[78],"vectorized":[79],"code":[80],"using":[81],"access":[83],"computational":[85],"patterns.":[86],"Nevertheless,":[87],"in":[88,203],"programs":[89],"with":[90,178],"or":[94],"data-dependent":[95],"behavior,":[96],"patterns":[98],"are":[99],"unknown":[100],"until":[101],"runtime,":[102],"limiting":[103],"the":[104,135,191,195],"effectiveness":[105],"of":[106,137,194],"static":[107],"strategies.To":[109],"address":[110,134],"these":[111],"challenges,":[112],"we":[113],"propose":[114],"DynVec,":[115],"unified":[117],"vector-dataflow":[118,156],"framework":[119],"that":[120,145,159,210,232],"integrates":[121],"dynamic":[122,235],"scheduling":[123],"exploit":[127],"runtime":[128],"beyond":[130],"models.":[132],"We":[133],"kernels":[139],"through":[140,151],"an":[141],"MLIR-based":[142],"context-aware":[143],"vectorizer":[144],"effectively":[146],"identifies":[147],"vectorizable":[148],"operations":[149,202],"and,":[150],"scheduling,":[153],"generates":[154],"execution":[157,186],"graph":[158],"explicitly":[160],"models":[161],"constructs,":[164],"data":[165],"interfaces,":[168],"operations.":[171],"DynVec":[172,239],"encapsulates":[173],"elastic":[175],"units":[176],"designed":[177],"built-in":[179],"support,":[181],"allowing":[182],"customizable":[183],"adaptive":[185],"behavior.":[187],"Our":[188],"compiler":[189],"preserves":[190],"structural":[192],"hierarchy":[193],"kernel":[196],"combining":[198],"vector":[199],"scalar":[201],"bottom-up,":[205],"type-safe":[206],"manner.":[207],"Experiments":[208],"show":[209],"our":[211],"approach":[212],"achieves":[213],"speedup":[215],"compared":[216,228],"state-of-the-art":[218],"implementations":[220],"across":[221],"various":[222],"regular":[223],"applications.":[226],"hybrid":[230],"accelerators":[231],"separately":[233],"support":[234],"vectorization,":[238],"delivers":[240],"superior":[241],"performance.":[242]},"counts_by_year":[],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-11-20T00:00:00"}
