{"id":"https://openalex.org/W4414197095","doi":"https://doi.org/10.1109/dac63849.2025.11132878","title":"HeteroSVD: Efficient SVD Accelerator on Versal ACAP with Algorithm-Hardware Co-Design","display_name":"HeteroSVD: Efficient SVD Accelerator on Versal ACAP with Algorithm-Hardware Co-Design","publication_year":2025,"publication_date":"2025-06-22","ids":{"openalex":"https://openalex.org/W4414197095","doi":"https://doi.org/10.1109/dac63849.2025.11132878"},"language":"en","primary_location":{"id":"doi:10.1109/dac63849.2025.11132878","is_oa":false,"landing_page_url":"https://doi.org/10.1109/dac63849.2025.11132878","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 62nd ACM/IEEE Design Automation Conference (DAC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5054929311","display_name":"Xinya Luan","orcid":null},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Xinya Luan","raw_affiliation_strings":["Beijing University of Posts and Telecommunications"],"affiliations":[{"raw_affiliation_string":"Beijing University of Posts and Telecommunications","institution_ids":["https://openalex.org/I139759216"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040011986","display_name":"Zhe Lin","orcid":"https://orcid.org/0000-0002-7422-7799"},"institutions":[{"id":"https://openalex.org/I157773358","display_name":"Sun Yat-sen University","ror":"https://ror.org/0064kty71","country_code":"CN","type":"education","lineage":["https://openalex.org/I157773358"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhe Lin","raw_affiliation_strings":["Sun Yat-sen University"],"affiliations":[{"raw_affiliation_string":"Sun Yat-sen University","institution_ids":["https://openalex.org/I157773358"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101062141","display_name":"Kai Shi","orcid":"https://orcid.org/0009-0002-4059-9663"},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Kai Shi","raw_affiliation_strings":["Beijing University of Posts and Telecommunications"],"affiliations":[{"raw_affiliation_string":"Beijing University of Posts and Telecommunications","institution_ids":["https://openalex.org/I139759216"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5016813457","display_name":"Jianwang Zhai","orcid":"https://orcid.org/0000-0002-1581-3536"},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jianwang Zhai","raw_affiliation_strings":["Beijing University of Posts and Telecommunications"],"affiliations":[{"raw_affiliation_string":"Beijing University of Posts and Telecommunications","institution_ids":["https://openalex.org/I139759216"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5035246366","display_name":"Kang Zhao","orcid":"https://orcid.org/0000-0002-8321-2804"},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Kang Zhao","raw_affiliation_strings":["Beijing University of Posts and Telecommunications"],"affiliations":[{"raw_affiliation_string":"Beijing University of Posts and Telecommunications","institution_ids":["https://openalex.org/I139759216"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5054929311"],"corresponding_institution_ids":["https://openalex.org/I139759216"],"apc_list":null,"apc_paid":null,"fwci":1.1856,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.81735773,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"7"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11993","display_name":"Atomic and Subatomic Physics Research","score":0.9866999983787537,"subfield":{"id":"https://openalex.org/subfields/3107","display_name":"Atomic and Molecular Physics, and Optics"},"field":{"id":"https://openalex.org/fields/31","display_name":"Physics and Astronomy"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11993","display_name":"Atomic and Subatomic Physics Research","score":0.9866999983787537,"subfield":{"id":"https://openalex.org/subfields/3107","display_name":"Atomic and Molecular Physics, and Optics"},"field":{"id":"https://openalex.org/fields/31","display_name":"Physics and Astronomy"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10378","display_name":"Advanced MRI Techniques and Applications","score":0.9736999869346619,"subfield":{"id":"https://openalex.org/subfields/2741","display_name":"Radiology, Nuclear Medicine and Imaging"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T10522","display_name":"Medical Imaging Techniques and Applications","score":0.9649999737739563,"subfield":{"id":"https://openalex.org/subfields/2741","display_name":"Radiology, Nuclear Medicine and Imaging"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/singular-value-decomposition","display_name":"Singular value decomposition","score":0.7843999862670898},{"id":"https://openalex.org/keywords/dataflow","display_name":"Dataflow","score":0.713100016117096},{"id":"https://openalex.org/keywords/matrix-decomposition","display_name":"Matrix decomposition","score":0.6507999897003174},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.6128000020980835},{"id":"https://openalex.org/keywords/field-programmable-gate-array","display_name":"Field-programmable gate array","score":0.5044000148773193},{"id":"https://openalex.org/keywords/decomposition","display_name":"Decomposition","score":0.4731999933719635},{"id":"https://openalex.org/keywords/factorization","display_name":"Factorization","score":0.42170000076293945},{"id":"https://openalex.org/keywords/signal-processing","display_name":"Signal processing","score":0.36230000853538513}],"concepts":[{"id":"https://openalex.org/C22789450","wikidata":"https://www.wikidata.org/wiki/Q420904","display_name":"Singular value decomposition","level":2,"score":0.7843999862670898},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7663000226020813},{"id":"https://openalex.org/C96324660","wikidata":"https://www.wikidata.org/wiki/Q205446","display_name":"Dataflow","level":2,"score":0.713100016117096},{"id":"https://openalex.org/C42355184","wikidata":"https://www.wikidata.org/wiki/Q1361088","display_name":"Matrix decomposition","level":3,"score":0.6507999897003174},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.6128000020980835},{"id":"https://openalex.org/C42935608","wikidata":"https://www.wikidata.org/wiki/Q190411","display_name":"Field-programmable gate array","level":2,"score":0.5044000148773193},{"id":"https://openalex.org/C124681953","wikidata":"https://www.wikidata.org/wiki/Q339062","display_name":"Decomposition","level":2,"score":0.4731999933719635},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.438400000333786},{"id":"https://openalex.org/C187834632","wikidata":"https://www.wikidata.org/wiki/Q188804","display_name":"Factorization","level":2,"score":0.42170000076293945},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.38359999656677246},{"id":"https://openalex.org/C104267543","wikidata":"https://www.wikidata.org/wiki/Q208163","display_name":"Signal processing","level":3,"score":0.36230000853538513},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.3343000113964081},{"id":"https://openalex.org/C106487976","wikidata":"https://www.wikidata.org/wiki/Q685816","display_name":"Matrix (chemical analysis)","level":2,"score":0.32670000195503235},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.320499986410141},{"id":"https://openalex.org/C2776221188","wikidata":"https://www.wikidata.org/wiki/Q21072556","display_name":"Design space exploration","level":2,"score":0.30979999899864197},{"id":"https://openalex.org/C17349429","wikidata":"https://www.wikidata.org/wiki/Q1049914","display_name":"Matrix multiplication","level":3,"score":0.3073999881744385},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.301800012588501},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.29739999771118164},{"id":"https://openalex.org/C186370098","wikidata":"https://www.wikidata.org/wiki/Q442787","display_name":"Energy (signal processing)","level":2,"score":0.2904999852180481},{"id":"https://openalex.org/C137836250","wikidata":"https://www.wikidata.org/wiki/Q984063","display_name":"Optimization problem","level":2,"score":0.28850001096725464},{"id":"https://openalex.org/C179799912","wikidata":"https://www.wikidata.org/wiki/Q205084","display_name":"Computational complexity theory","level":2,"score":0.2759000062942505},{"id":"https://openalex.org/C13164978","wikidata":"https://www.wikidata.org/wiki/Q600158","display_name":"Hardware acceleration","level":3,"score":0.26829999685287476}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/dac63849.2025.11132878","is_oa":false,"landing_page_url":"https://doi.org/10.1109/dac63849.2025.11132878","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 62nd ACM/IEEE Design Automation Conference (DAC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":15,"referenced_works":["https://openalex.org/W1997259299","https://openalex.org/W2017192372","https://openalex.org/W2093637157","https://openalex.org/W2105812312","https://openalex.org/W2114043910","https://openalex.org/W2150226074","https://openalex.org/W2169512887","https://openalex.org/W2289766496","https://openalex.org/W2736056409","https://openalex.org/W2768160734","https://openalex.org/W3041402615","https://openalex.org/W4226021321","https://openalex.org/W4292826159","https://openalex.org/W4308090464","https://openalex.org/W4321636561"],"related_works":[],"abstract_inverted_index":{"Singular":[0],"value":[1],"decomposition":[2,84],"(SVD)":[3],"is":[4,26],"a":[5,67,81],"matrix":[6],"factorization":[7],"technique":[8],"widely":[9],"used":[10],"in":[11,41,107,163,167,172],"signal":[12],"processing":[13],"and":[14,46,56,86,97,102,116,131,151,169],"recommendation":[15],"systems,":[16],"etc.":[17],"In":[18],"general,":[19],"the":[20,29,75,111,142],"time":[21],"complexity":[22],"of":[23,51,113,158],"SVD":[24,33,72,83,94],"algorithms":[25,34],"cubic":[27],"to":[28,36,92,109,160],"problem":[30],"size,":[31],"making":[32],"difficult":[35],"meet":[37],"stringent":[38],"performance":[39,129],"requirements":[40],"real-time.":[42],"However,":[43],"existing":[44,148],"FPGA":[45,149],"GPU":[47,153],"solutions":[48,154],"fall":[49],"short":[50],"jointly":[52,96],"optimizing":[53],"latency,":[54,164],"throughput,":[55,168],"power":[57],"consumption.":[58],"To":[59],"settle":[60],"this":[61,63],"issue,":[62],"paper":[64],"proposes":[65,87],"HeteroSVD,":[66],"heterogeneous":[68],"reconfigurable":[69],"accelerator":[70],"for":[71],"computation":[73],"on":[74],"Versal":[76],"ACAP":[77],"platform.":[78],"HeteroSVD":[79,140],"introduces":[80],"system-level":[82],"mechanism":[85],"an":[88,122,156],"algorithm-hardware":[89],"co-design":[90],"method":[91],"optimize":[93],"ordering":[95],"AI":[98],"engine":[99],"(AIE)-centric":[100],"dataflow":[101],"placement":[103],"with":[104,155],"Versal.":[105],"Furthermore,":[106],"order":[108],"improve":[110],"quality":[112],"results":[114,137],"(QoR)":[115],"facilitate":[117],"micro-architecture":[118],"selection,":[119],"we":[120],"introduce":[121],"automatic":[123],"optimization":[124],"framework":[125],"that":[126,139],"performs":[127],"accurate":[128],"modeling":[130],"fast":[132],"design":[133],"space":[134],"exploration.":[135],"Experiment":[136],"demonstrate":[138],"reduces":[141],"latency":[143],"by":[144],"$1.98":[145],"\\times$":[146,162,166,171],"over":[147],"accelerators":[150],"outperforms":[152],"improvement":[157],"up":[159],"$7.22":[161],"$1.77":[165],"$13.18":[170],"energy":[173],"efficiency.":[174]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-10-10T00:00:00"}
