{"id":"https://openalex.org/W4415250785","doi":"https://doi.org/10.1109/hpec67600.2025.11196474","title":"Evaluating AMD-Xilinx Frameworks for Deep-Learning Acceleration on Versal","display_name":"Evaluating AMD-Xilinx Frameworks for Deep-Learning Acceleration on Versal","publication_year":2025,"publication_date":"2025-09-15","ids":{"openalex":"https://openalex.org/W4415250785","doi":"https://doi.org/10.1109/hpec67600.2025.11196474"},"language":"en","primary_location":{"id":"doi:10.1109/hpec67600.2025.11196474","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpec67600.2025.11196474","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE High Performance Extreme Computing Conference (HPEC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5119840633","display_name":"Peter Drum","orcid":null},"institutions":[{"id":"https://openalex.org/I4210143490","display_name":"Computing Center","ror":"https://ror.org/0557kgc34","country_code":"RU","type":"facility","lineage":["https://openalex.org/I1313323035","https://openalex.org/I4210143490","https://openalex.org/I4210148470"]}],"countries":["RU"],"is_corresponding":true,"raw_author_name":"Peter Drum","raw_affiliation_strings":["University of Pittsburgh,SHREC: NSF Center for Space, High Performance, and Resilient Computing NSF SHREC Center,Pittsburgh,Pennsylvania,15213"],"affiliations":[{"raw_affiliation_string":"University of Pittsburgh,SHREC: NSF Center for Space, High Performance, and Resilient Computing NSF SHREC Center,Pittsburgh,Pennsylvania,15213","institution_ids":["https://openalex.org/I4210143490"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5082898376","display_name":"Alan D. George","orcid":"https://orcid.org/0000-0001-9665-2879"},"institutions":[{"id":"https://openalex.org/I4210143490","display_name":"Computing Center","ror":"https://ror.org/0557kgc34","country_code":"RU","type":"facility","lineage":["https://openalex.org/I1313323035","https://openalex.org/I4210143490","https://openalex.org/I4210148470"]}],"countries":["RU"],"is_corresponding":false,"raw_author_name":"Alan D. George","raw_affiliation_strings":["University of Pittsburgh,SHREC: NSF Center for Space, High Performance, and Resilient Computing NSF SHREC Center,Pittsburgh,Pennsylvania,15213"],"affiliations":[{"raw_affiliation_string":"University of Pittsburgh,SHREC: NSF Center for Space, High Performance, and Resilient Computing NSF SHREC Center,Pittsburgh,Pennsylvania,15213","institution_ids":["https://openalex.org/I4210143490"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5119840633"],"corresponding_institution_ids":["https://openalex.org/I4210143490"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.31483778,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"8"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.8266000151634216,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.8266000151634216,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11992","display_name":"CCD and CMOS Imaging Sensors","score":0.7824000120162964,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10904","display_name":"Embedded Systems Design Techniques","score":0.6924999952316284,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.7534999847412109},{"id":"https://openalex.org/keywords/acceleration","display_name":"Acceleration","score":0.5788999795913696},{"id":"https://openalex.org/keywords/architecture","display_name":"Architecture","score":0.5192000269889832},{"id":"https://openalex.org/keywords/preprocessor","display_name":"Preprocessor","score":0.48919999599456787},{"id":"https://openalex.org/keywords/hardware-acceleration","display_name":"Hardware acceleration","score":0.48910000920295715},{"id":"https://openalex.org/keywords/power","display_name":"Power (physics)","score":0.4690000116825104},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.3984000086784363}],"concepts":[{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.7534999847412109},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7006000280380249},{"id":"https://openalex.org/C117896860","wikidata":"https://www.wikidata.org/wiki/Q11376","display_name":"Acceleration","level":2,"score":0.5788999795913696},{"id":"https://openalex.org/C123657996","wikidata":"https://www.wikidata.org/wiki/Q12271","display_name":"Architecture","level":2,"score":0.5192000269889832},{"id":"https://openalex.org/C34736171","wikidata":"https://www.wikidata.org/wiki/Q918333","display_name":"Preprocessor","level":2,"score":0.48919999599456787},{"id":"https://openalex.org/C13164978","wikidata":"https://www.wikidata.org/wiki/Q600158","display_name":"Hardware acceleration","level":3,"score":0.48910000920295715},{"id":"https://openalex.org/C163258240","wikidata":"https://www.wikidata.org/wiki/Q25342","display_name":"Power (physics)","level":2,"score":0.4690000116825104},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.4189000129699707},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.40700000524520874},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.3984000086784363},{"id":"https://openalex.org/C65232700","wikidata":"https://www.wikidata.org/wiki/Q5656403","display_name":"Hardware architecture","level":3,"score":0.36410000920295715},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.3447999954223633},{"id":"https://openalex.org/C142962650","wikidata":"https://www.wikidata.org/wiki/Q240838","display_name":"Reconfigurable computing","level":3,"score":0.29829999804496765},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.29670000076293945},{"id":"https://openalex.org/C136197465","wikidata":"https://www.wikidata.org/wiki/Q1729295","display_name":"Variety (cybernetics)","level":2,"score":0.29170000553131104},{"id":"https://openalex.org/C42935608","wikidata":"https://www.wikidata.org/wiki/Q190411","display_name":"Field-programmable gate array","level":2,"score":0.2816999852657318},{"id":"https://openalex.org/C78766204","wikidata":"https://www.wikidata.org/wiki/Q555032","display_name":"Multi-core processor","level":2,"score":0.27900001406669617},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.2574000060558319},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.25540000200271606},{"id":"https://openalex.org/C2985918086","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel architecture","level":3,"score":0.2515000104904175}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/hpec67600.2025.11196474","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpec67600.2025.11196474","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE High Performance Extreme Computing Conference (HPEC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"AMD-Xilinx":[0],"Versal":[1,54,107,131,168],"Adaptive":[2],"System-on-Chip":[3],"devices":[4],"are":[5],"powerful":[6],"hardware":[7,108],"accelerators":[8,86,126,165],"with":[9,96],"a":[10,22,39,117,189,201,206,211,226,270],"variety":[11],"of":[12,47,148,178,203,214,228],"tools":[13],"for":[14,63,87,188,262],"implementing":[15],"high-performance,":[16],"low-power,":[17],"deep-learning":[18,29,173],"models.":[19],"Vitis":[20,79,110,160,274],"AI,":[21],"machine-learning":[23,73],"framework":[24,74],"created":[25,75],"by":[26,76,81],"AMD-Xilinx,":[27,77],"accelerates":[28],"algorithms":[30],"using":[31],"the":[32,48,57,93,138,167,195,218,232],"Deep-Learning":[33],"Processor":[34],"Unit":[35],"(DPU),":[36],"which":[37],"includes":[38],"Versal-specific":[40],"architecture":[41,208,236],"designed":[42,193],"to":[43,67,92,123,127,137,142,157,231,273],"take":[44],"full":[45],"advantage":[46],"unique":[49],"processing":[50],"elements":[51],"on":[52,106,130,166,277],"select":[53],"platforms.":[55],"However,":[56],"DPU":[58,162,207,223,276],"does":[59],"not":[60,102],"run":[61,128],"efficiently":[62,129],"small":[64,190,263],"models":[65,175,264,280],"due":[66],"its":[68],"general-purpose":[69],"nature.":[70],"FINN,":[71],"another":[72],"complements":[78],"AI":[80,169],"providing":[82],"custom":[83,119],"low-latency,":[84],"high-throughput":[85],"highly":[88],"quantized":[89],"networks.":[90],"Due":[91],"Versal\u2019s":[94],"incompatibility":[95],"FINN\u2019s":[97],"software":[98],"drivers,":[99],"FINN":[100,125,140,197,199,235,254],"has":[101],"yet":[103],"been":[104],"tested":[105,224],"against":[109,163],"AI.":[111],"In":[112],"this":[113,152],"research,":[114,198],"we":[115,251],"create":[116],"novel":[118],"wrapper":[120,156],"and":[121,155,182,259,265],"driver":[122],"allow":[124],"devices.":[132],"We":[133,150,185],"also":[134],"add":[135],"preprocessing":[136],"generated":[139],"accelerator":[141,154],"avoid":[143],"bottlenecks":[144],"at":[145],"high":[146],"levels":[147],"parallelization.":[149],"use":[151],"adjusted":[153],"directly":[158],"compare":[159],"AI\u2019s":[161,275],"FINN-created":[164],"Core":[170],"across":[171],"two":[172],"classification":[174],"in":[176,194],"terms":[177],"throughput,":[179],"power":[180,242,260],"efficiency,":[181],"device":[183],"utilization.":[184],"found":[186],"that":[187,209,253],"convolutional":[191],"model":[192],"original":[196],"achieves":[200,225],"speedup":[202,227],"10.1\u00d7":[204],"over":[205],"consumes":[210,239],"similar":[212],"amount":[213],"power.":[215],"Running":[216],"MobileNetV1,":[217],"most":[219,233],"performant":[220,234],"(highest":[221],"framerate)":[222],"3.0\u00d7":[229],"compared":[230],"tested,":[237],"but":[238],"significantly":[240],"more":[241],"(40.8":[243],"W":[244],"vs":[245],"17.8":[246],"W).":[247],"From":[248],"our":[249],"results,":[250],"conclude":[252],"can":[255,266],"offer":[256],"better":[257],"performance":[258],"efficiency":[261],"be":[267],"used":[268],"as":[269,282],"low-power":[271],"alternative":[272],"larger":[278],"edge-computing":[279],"such":[281],"MobileNetV1.":[283]},"counts_by_year":[],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-10-16T00:00:00"}
