{"id":"https://openalex.org/W4416285686","doi":"https://doi.org/10.48550/arxiv.2507.07683","title":"Accelerating Transposed Convolutions on FPGA-based Edge Devices","display_name":"Accelerating Transposed Convolutions on FPGA-based Edge Devices","publication_year":2025,"publication_date":"2025-07-10","ids":{"openalex":"https://openalex.org/W4416285686","doi":"https://doi.org/10.48550/arxiv.2507.07683"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2507.07683","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2507.07683","pdf_url":"https://arxiv.org/pdf/2507.07683","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2507.07683","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5037278231","display_name":"Jude Haris","orcid":"https://orcid.org/0000-0001-7359-3888"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Haris, Jude","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5008501980","display_name":"Jos\u00e9 Cano","orcid":"https://orcid.org/0000-0002-2243-389X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cano, Jos\u00e9","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5037278231"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.3192000091075897,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.3192000091075897,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10904","display_name":"Embedded Systems Design Techniques","score":0.16899999976158142,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11697","display_name":"Numerical Methods and Algorithms","score":0.11860000342130661,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.7006000280380249},{"id":"https://openalex.org/keywords/enhanced-data-rates-for-gsm-evolution","display_name":"Enhanced Data Rates for GSM Evolution","score":0.5613999962806702},{"id":"https://openalex.org/keywords/bottleneck","display_name":"Bottleneck","score":0.4997999966144562},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.46140000224113464},{"id":"https://openalex.org/keywords/generative-grammar","display_name":"Generative grammar","score":0.3813000023365021},{"id":"https://openalex.org/keywords/matrix-multiplication","display_name":"Matrix multiplication","score":0.3774000108242035},{"id":"https://openalex.org/keywords/edge-device","display_name":"Edge device","score":0.36629998683929443},{"id":"https://openalex.org/keywords/matrix","display_name":"Matrix (chemical analysis)","score":0.3617999851703644}],"concepts":[{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.7006000280380249},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6432999968528748},{"id":"https://openalex.org/C162307627","wikidata":"https://www.wikidata.org/wiki/Q204833","display_name":"Enhanced Data Rates for GSM Evolution","level":2,"score":0.5613999962806702},{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.4997999966144562},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.46140000224113464},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.4366999864578247},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3926999866962433},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.3813000023365021},{"id":"https://openalex.org/C17349429","wikidata":"https://www.wikidata.org/wiki/Q1049914","display_name":"Matrix multiplication","level":3,"score":0.3774000108242035},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.3714999854564667},{"id":"https://openalex.org/C138236772","wikidata":"https://www.wikidata.org/wiki/Q25098575","display_name":"Edge device","level":3,"score":0.36629998683929443},{"id":"https://openalex.org/C106487976","wikidata":"https://www.wikidata.org/wiki/Q685816","display_name":"Matrix (chemical analysis)","level":2,"score":0.3617999851703644},{"id":"https://openalex.org/C2780595030","wikidata":"https://www.wikidata.org/wiki/Q3860309","display_name":"Multiplication (music)","level":2,"score":0.34549999237060547},{"id":"https://openalex.org/C111335779","wikidata":"https://www.wikidata.org/wiki/Q3454686","display_name":"Reduction (mathematics)","level":2,"score":0.3449000120162964},{"id":"https://openalex.org/C204323151","wikidata":"https://www.wikidata.org/wiki/Q905424","display_name":"Range (aeronautics)","level":2,"score":0.3138999938964844},{"id":"https://openalex.org/C19768560","wikidata":"https://www.wikidata.org/wiki/Q320727","display_name":"Dependency (UML)","level":2,"score":0.2784999907016754},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.2694999873638153},{"id":"https://openalex.org/C2779570065","wikidata":"https://www.wikidata.org/wiki/Q1754137","display_name":"Leading edge","level":2,"score":0.26829999685287476},{"id":"https://openalex.org/C49154492","wikidata":"https://www.wikidata.org/wiki/Q5300","display_name":"Central processing unit","level":2,"score":0.26660001277923584},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.258899986743927},{"id":"https://openalex.org/C160633673","wikidata":"https://www.wikidata.org/wiki/Q355198","display_name":"Pixel","level":2,"score":0.257999986410141},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.25440001487731934},{"id":"https://openalex.org/C89611455","wikidata":"https://www.wikidata.org/wiki/Q6804646","display_name":"Mechanism (biology)","level":2,"score":0.25189998745918274}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2507.07683","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2507.07683","pdf_url":"https://arxiv.org/pdf/2507.07683","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2507.07683","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2507.07683","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2507.07683","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2507.07683","pdf_url":"https://arxiv.org/pdf/2507.07683","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Transposed":[0],"Convolutions":[1],"(TCONV)":[2],"enable":[3],"the":[4,14,36,79,112,151,167],"up-scaling":[5],"mechanism":[6],"within":[7],"generative":[8,42,124],"Artificial":[9],"Intelligence":[10],"(AI)":[11],"models.":[12],"However,":[13],"predominant":[15],"Input-Oriented":[16],"Mapping":[17],"(IOM)":[18],"method":[19],"for":[20],"implementing":[21],"TCONV":[22,40,71,92,120,137],"has":[23],"complex":[24],"output":[25],"mapping,":[26],"overlapping":[27],"sums,":[28],"and":[29,41,86,131,153,162],"ineffectual":[30],"computations.":[31],"These":[32],"inefficiencies":[33],"further":[34],"exacerbate":[35],"performance":[37,89,113],"bottleneck":[38],"of":[39,99,114,119],"models":[43,125],"on":[44,73,116,150],"resource-constrained":[45,74,136],"edge":[46,75],"devices.":[47],"To":[48],"address":[49],"this":[50,53],"problem,":[51],"in":[52],"paper":[54],"we":[55,83,147],"propose":[56],"MM2IM,":[57],"a":[58,102,117],"hardware-software":[59],"co-designed":[60],"accelerator":[61],"that":[62],"combines":[63],"Matrix":[64],"Multiplication":[65],"(MatMul)":[66],"with":[67],"col2IM":[68],"to":[69,128,159],"process":[70],"layers":[72,121],"devices":[76],"efficiently.":[77],"Using":[78],"SECDA-TFLite":[80],"design":[81],"toolkit,":[82],"implement":[84],"MM2IM":[85,115,149],"evaluate":[87,111,148],"its":[88],"across":[90],"261":[91],"problem":[93],"configurations,":[94],"achieving":[95,126,157],"an":[96],"average":[97],"speedup":[98,161],"1.9x":[100],"against":[101,134,166],"dual-thread":[103],"ARM":[104],"Neon":[105],"optimized":[106],"CPU":[107,168],"baseline.":[108,169],"We":[109],"then":[110],"range":[118],"from":[122],"well-known":[123],"up":[127,158],"4.2x":[129],"speedup,":[130],"compare":[132],"it":[133],"similar":[135],"accelerators,":[138],"outperforming":[139],"them":[140],"by":[141],"at":[142],"least":[143],"2x":[144],"GOPs/DSP.":[145],"Finally,":[146],"DCGAN":[152],"pix2pix":[154],"GAN":[155],"models,":[156],"3x":[160],"2.4x":[163],"energy":[164],"reduction":[165]},"counts_by_year":[],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-10-10T00:00:00"}
