{"id":"https://openalex.org/W4410583693","doi":"https://doi.org/10.23919/date64628.2025.10992860","title":"Axon: A Novel Systolic Array Architecture for Improved Run Time and Energy Efficient GeMM and Conv Operation with On-Chip im2col","display_name":"Axon: A Novel Systolic Array Architecture for Improved Run Time and Energy Efficient GeMM and Conv Operation with On-Chip im2col","publication_year":2025,"publication_date":"2025-03-31","ids":{"openalex":"https://openalex.org/W4410583693","doi":"https://doi.org/10.23919/date64628.2025.10992860"},"language":"en","primary_location":{"id":"doi:10.23919/date64628.2025.10992860","is_oa":false,"landing_page_url":"https://doi.org/10.23919/date64628.2025.10992860","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 Design, Automation &amp;amp; Test in Europe Conference (DATE)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5037120477","display_name":"Md. Mizanur Rahaman Nayan","orcid":null},"institutions":[{"id":"https://openalex.org/I130701444","display_name":"Georgia Institute of Technology","ror":"https://ror.org/01zkghx44","country_code":"US","type":"education","lineage":["https://openalex.org/I130701444"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Md Mizanur Rahaman Nayan","raw_affiliation_strings":["Georgia Institute of Technology,Department of Electrical and Computer Engineering,USA"],"affiliations":[{"raw_affiliation_string":"Georgia Institute of Technology,Department of Electrical and Computer Engineering,USA","institution_ids":["https://openalex.org/I130701444"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5078314931","display_name":"Ritik Raj","orcid":"https://orcid.org/0009-0005-1853-6107"},"institutions":[{"id":"https://openalex.org/I130701444","display_name":"Georgia Institute of Technology","ror":"https://ror.org/01zkghx44","country_code":"US","type":"education","lineage":["https://openalex.org/I130701444"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ritik Raj","raw_affiliation_strings":["Georgia Institute of Technology,Department of Electrical and Computer Engineering,USA"],"affiliations":[{"raw_affiliation_string":"Georgia Institute of Technology,Department of Electrical and Computer Engineering,USA","institution_ids":["https://openalex.org/I130701444"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5115861658","display_name":"Gouse Basha Shaik","orcid":null},"institutions":[{"id":"https://openalex.org/I130701444","display_name":"Georgia Institute of Technology","ror":"https://ror.org/01zkghx44","country_code":"US","type":"education","lineage":["https://openalex.org/I130701444"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Gouse Basha Shaik","raw_affiliation_strings":["Georgia Institute of Technology,Department of Electrical and Computer Engineering,USA"],"affiliations":[{"raw_affiliation_string":"Georgia Institute of Technology,Department of Electrical and Computer Engineering,USA","institution_ids":["https://openalex.org/I130701444"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034089074","display_name":"Tushar Krishna","orcid":"https://orcid.org/0000-0001-5738-6942"},"institutions":[{"id":"https://openalex.org/I130701444","display_name":"Georgia Institute of Technology","ror":"https://ror.org/01zkghx44","country_code":"US","type":"education","lineage":["https://openalex.org/I130701444"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Tushar Krishna","raw_affiliation_strings":["Georgia Institute of Technology,Department of Electrical and Computer Engineering,USA"],"affiliations":[{"raw_affiliation_string":"Georgia Institute of Technology,Department of Electrical and Computer Engineering,USA","institution_ids":["https://openalex.org/I130701444"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5080526846","display_name":"Azad Naeemi","orcid":"https://orcid.org/0000-0003-4774-9046"},"institutions":[{"id":"https://openalex.org/I130701444","display_name":"Georgia Institute of Technology","ror":"https://ror.org/01zkghx44","country_code":"US","type":"education","lineage":["https://openalex.org/I130701444"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Azad J Naeemi","raw_affiliation_strings":["Georgia Institute of Technology,Department of Electrical and Computer Engineering,USA"],"affiliations":[{"raw_affiliation_string":"Georgia Institute of Technology,Department of Electrical and Computer Engineering,USA","institution_ids":["https://openalex.org/I130701444"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5037120477"],"corresponding_institution_ids":["https://openalex.org/I130701444"],"apc_list":null,"apc_paid":null,"fwci":3.1134,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.91693792,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":95,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"7"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10829","display_name":"Interconnection Networks and Systems","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10829","display_name":"Interconnection Networks and Systems","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.996999979019165,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10363","display_name":"Low-power high-performance VLSI design","score":0.9955999851226807,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/systolic-array","display_name":"Systolic array","score":0.7021390795707703},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6672224402427673},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.6432017087936401},{"id":"https://openalex.org/keywords/architecture","display_name":"Architecture","score":0.4543260335922241},{"id":"https://openalex.org/keywords/energy","display_name":"Energy (signal processing)","score":0.4431796073913574},{"id":"https://openalex.org/keywords/chip","display_name":"Chip","score":0.44126665592193604},{"id":"https://openalex.org/keywords/system-on-a-chip","display_name":"System on a chip","score":0.41236862540245056},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.3947736620903015},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.38295215368270874},{"id":"https://openalex.org/keywords/very-large-scale-integration","display_name":"Very-large-scale integration","score":0.26187270879745483},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.11622220277786255},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.09167948365211487}],"concepts":[{"id":"https://openalex.org/C150741067","wikidata":"https://www.wikidata.org/wiki/Q2377218","display_name":"Systolic array","level":3,"score":0.7021390795707703},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6672224402427673},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.6432017087936401},{"id":"https://openalex.org/C123657996","wikidata":"https://www.wikidata.org/wiki/Q12271","display_name":"Architecture","level":2,"score":0.4543260335922241},{"id":"https://openalex.org/C186370098","wikidata":"https://www.wikidata.org/wiki/Q442787","display_name":"Energy (signal processing)","level":2,"score":0.4431796073913574},{"id":"https://openalex.org/C165005293","wikidata":"https://www.wikidata.org/wiki/Q1074500","display_name":"Chip","level":2,"score":0.44126665592193604},{"id":"https://openalex.org/C118021083","wikidata":"https://www.wikidata.org/wiki/Q610398","display_name":"System on a chip","level":2,"score":0.41236862540245056},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.3947736620903015},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.38295215368270874},{"id":"https://openalex.org/C14580979","wikidata":"https://www.wikidata.org/wiki/Q876049","display_name":"Very-large-scale integration","level":2,"score":0.26187270879745483},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.11622220277786255},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.09167948365211487},{"id":"https://openalex.org/C153349607","wikidata":"https://www.wikidata.org/wiki/Q36649","display_name":"Visual arts","level":1,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.23919/date64628.2025.10992860","is_oa":false,"landing_page_url":"https://doi.org/10.23919/date64628.2025.10992860","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 Design, Automation &amp;amp; Test in Europe Conference (DATE)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.8700000047683716,"display_name":"Affordable and clean energy","id":"https://metadata.un.org/sdg/7"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":37,"referenced_works":["https://openalex.org/W2346205343","https://openalex.org/W2442974303","https://openalex.org/W2606722458","https://openalex.org/W2612076670","https://openalex.org/W2899644485","https://openalex.org/W2942898010","https://openalex.org/W2945146780","https://openalex.org/W2962780877","https://openalex.org/W3012178976","https://openalex.org/W3012561096","https://openalex.org/W3031172388","https://openalex.org/W3036646298","https://openalex.org/W3040024858","https://openalex.org/W3043406639","https://openalex.org/W3047390932","https://openalex.org/W3091512799","https://openalex.org/W3092319711","https://openalex.org/W3097528158","https://openalex.org/W3113606433","https://openalex.org/W3130554079","https://openalex.org/W3184376546","https://openalex.org/W3186743607","https://openalex.org/W3190062760","https://openalex.org/W3200280098","https://openalex.org/W3213528054","https://openalex.org/W3217357178","https://openalex.org/W4244024631","https://openalex.org/W4280635517","https://openalex.org/W4293024984","https://openalex.org/W4312929963","https://openalex.org/W4381050415","https://openalex.org/W4382203333","https://openalex.org/W4385245566","https://openalex.org/W4386736489","https://openalex.org/W4386763770","https://openalex.org/W6778883912","https://openalex.org/W6798882123"],"related_works":["https://openalex.org/W2065289416","https://openalex.org/W2017236304","https://openalex.org/W3142211975","https://openalex.org/W2115579119","https://openalex.org/W2136854845","https://openalex.org/W1879443270","https://openalex.org/W2018912978","https://openalex.org/W2130914040","https://openalex.org/W2119122672","https://openalex.org/W4292904049"],"abstract_inverted_index":{"General":[0],"matrix":[1],"multiplication":[2],"(<tex":[3],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[4,26],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">$GeMM$</tex>)":[5],"is":[6],"a":[7,40,59,106],"core":[8],"operation":[9],"in":[10,43,65,126,159,196],"virtually":[11],"all":[12],"AI":[13],"applications.":[14],"Systolic":[15],"array":[16],"(SA)":[17],"based":[18,178],"architectures":[19],"have":[20,167],"shown":[21],"great":[22],"promise":[23],"as":[24,103],"<tex":[25],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">$GeMM$</tex>":[27],"hardware":[28,91,108,152,160],"accelerators":[29],"thanks":[30],"to":[31,48,86,110,156],"their":[32],"speed":[33],"and":[34,118,130,138,153,169,172,182,189,199],"energy":[35,134],"efficiency.":[36],"Unfortunately,":[37],"SAs":[38,66],"incur":[39],"linear":[41],"delay":[42],"filling":[44],"the":[45,73,82,95,121,163,180],"operands,":[46],"due":[47],"unidirectional":[49],"propogation":[50],"via":[51],"pipeline":[52],"latches.":[53],"In":[54,93,143],"this":[55],"work,":[56],"we":[57,68],"propose":[58],"novel":[60,181],"in-array":[61],"data":[62,70,97,146,164],"orchestration":[63,98,147],"technique":[64],"where":[67],"enable":[69],"feeding":[71],"on":[72,141,179],"principal":[74],"diagonal":[75],"followed":[76],"by":[77,84],"bi-directional":[78],"propagation.":[79],"This":[80],"improves":[81],"runtime":[83],"up":[85],"2":[87],"\u00d7":[88,132],"at":[89],"minimal":[90],"overhead.":[92],"addition,":[94],"proposed":[96,193],"enables":[99],"convolution":[100],"lowering":[101],"(known":[102],"im2col)":[104],"using":[105,185],"simple":[107],"support":[109],"fully":[111],"exploit":[112],"input":[113],"feature":[114],"map":[115],"reuse":[116],"opportunity":[117],"significantly":[119],"lower":[120],"off-chip":[122],"memory":[123],"traffic":[124],"resulting":[125],"1.2":[127],"\u00d7throughput":[128],"improvement":[129],"2.17":[131],"inference":[133],"reduction":[135],"during":[136],"YOLOv3":[137],"RESNET50":[139],"workload":[140],"average.":[142],"contrast,":[144],"conventional":[145,183],"would":[148],"require":[149],"more":[150],"elaborate":[151],"control":[154],"signals":[155],"implement":[157],"im2col":[158],"because":[161],"of":[162],"skew.":[165],"We":[166],"synthesized":[168],"conducted":[170],"place":[171],"route":[173],"for":[174],"16\u00d716":[175],"systolic":[176],"arrays":[177],"orchestrations":[184],"ASAP":[186],"7nm":[187],"PDK":[188],"found":[190],"that":[191],"our":[192],"approach":[194],"results":[195],"0.211%":[197],"area":[198],"1.6%":[200],"power":[201],"overheads.":[202]},"counts_by_year":[{"year":2025,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
