{"id":"https://openalex.org/W4391125526","doi":"https://doi.org/10.1145/3626202.3637569","title":"SSR: Spatial Sequential Hybrid Architecture for Latency Throughput Tradeoff in Transformer Acceleration","display_name":"SSR: Spatial Sequential Hybrid Architecture for Latency Throughput Tradeoff in Transformer Acceleration","publication_year":2024,"publication_date":"2024-04-01","ids":{"openalex":"https://openalex.org/W4391125526","doi":"https://doi.org/10.1145/3626202.3637569"},"language":"en","primary_location":{"id":"doi:10.1145/3626202.3637569","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3626202.3637569","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3626202.3637569","source":null,"license":"cc-by-nc-sa","license_id":"https://openalex.org/licenses/cc-by-nc-sa","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2024 ACM/SIGDA International Symposium on Field Programmable Gate Arrays","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3626202.3637569","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5074853634","display_name":"Jinming Zhuang","orcid":"https://orcid.org/0000-0003-3659-339X"},"institutions":[{"id":"https://openalex.org/I170201317","display_name":"University of Pittsburgh","ror":"https://ror.org/01an3r305","country_code":"US","type":"education","lineage":["https://openalex.org/I170201317"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Jinming Zhuang","raw_affiliation_strings":["University of Pittsburgh, Pittsburgh, USA"],"affiliations":[{"raw_affiliation_string":"University of Pittsburgh, Pittsburgh, USA","institution_ids":["https://openalex.org/I170201317"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5088193007","display_name":"Zhuoping Yang","orcid":"https://orcid.org/0000-0002-7655-4080"},"institutions":[{"id":"https://openalex.org/I170201317","display_name":"University of Pittsburgh","ror":"https://ror.org/01an3r305","country_code":"US","type":"education","lineage":["https://openalex.org/I170201317"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Zhuoping Yang","raw_affiliation_strings":["University of Pittsburgh, Pittsburgh, USA"],"affiliations":[{"raw_affiliation_string":"University of Pittsburgh, Pittsburgh, USA","institution_ids":["https://openalex.org/I170201317"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103205936","display_name":"Shixin Ji","orcid":"https://orcid.org/0009-0003-3429-4692"},"institutions":[{"id":"https://openalex.org/I170201317","display_name":"University of Pittsburgh","ror":"https://ror.org/01an3r305","country_code":"US","type":"education","lineage":["https://openalex.org/I170201317"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Shixin Ji","raw_affiliation_strings":["University of Pittsburgh, Pittsburgh, USA"],"affiliations":[{"raw_affiliation_string":"University of Pittsburgh, Pittsburgh, USA","institution_ids":["https://openalex.org/I170201317"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5060016795","display_name":"Heng Huang","orcid":"https://orcid.org/0000-0002-3483-8333"},"institutions":[{"id":"https://openalex.org/I66946132","display_name":"University of Maryland, College Park","ror":"https://ror.org/047s2c258","country_code":"US","type":"education","lineage":["https://openalex.org/I66946132"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Heng Huang","raw_affiliation_strings":["University of Maryland, College Park, USA"],"affiliations":[{"raw_affiliation_string":"University of Maryland, College Park, USA","institution_ids":["https://openalex.org/I66946132"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5030875484","display_name":"Alex K. Jones","orcid":"https://orcid.org/0000-0001-7498-0206"},"institutions":[{"id":"https://openalex.org/I170201317","display_name":"University of Pittsburgh","ror":"https://ror.org/01an3r305","country_code":"US","type":"education","lineage":["https://openalex.org/I170201317"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Alex K. Jones","raw_affiliation_strings":["University of Pittsburgh, Pittsburgh, USA"],"affiliations":[{"raw_affiliation_string":"University of Pittsburgh, Pittsburgh, USA","institution_ids":["https://openalex.org/I170201317"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5066534595","display_name":"Jingtong Hu","orcid":"https://orcid.org/0000-0003-4029-4034"},"institutions":[{"id":"https://openalex.org/I170201317","display_name":"University of Pittsburgh","ror":"https://ror.org/01an3r305","country_code":"US","type":"education","lineage":["https://openalex.org/I170201317"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jingtong Hu","raw_affiliation_strings":["University of Pittsburgh, Pittsburgh, USA"],"affiliations":[{"raw_affiliation_string":"University of Pittsburgh, Pittsburgh, USA","institution_ids":["https://openalex.org/I170201317"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5000141831","display_name":"Yiyu Shi","orcid":"https://orcid.org/0000-0002-6788-9823"},"institutions":[{"id":"https://openalex.org/I107639228","display_name":"University of Notre Dame","ror":"https://ror.org/00mkhxb43","country_code":"US","type":"education","lineage":["https://openalex.org/I107639228"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yiyu Shi","raw_affiliation_strings":["University of Notre Dame, Notre Dame, USA"],"affiliations":[{"raw_affiliation_string":"University of Notre Dame, Notre Dame, USA","institution_ids":["https://openalex.org/I107639228"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5063866156","display_name":"Peipei Zhou","orcid":"https://orcid.org/0000-0002-0493-1844"},"institutions":[{"id":"https://openalex.org/I170201317","display_name":"University of Pittsburgh","ror":"https://ror.org/01an3r305","country_code":"US","type":"education","lineage":["https://openalex.org/I170201317"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Peipei Zhou","raw_affiliation_strings":["University of Pittsburgh, Pittsburgh, USA"],"affiliations":[{"raw_affiliation_string":"University of Pittsburgh, Pittsburgh, USA","institution_ids":["https://openalex.org/I170201317"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5074853634"],"corresponding_institution_ids":["https://openalex.org/I170201317"],"apc_list":null,"apc_paid":null,"fwci":11.1005,"has_fulltext":true,"cited_by_count":23,"citation_normalized_percentile":{"value":0.99290073,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"55","last_page":"66"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10502","display_name":"Advanced Memory and Neural Computing","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12808","display_name":"Ferroelectric and Negative Capacitance Devices","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8079665899276733},{"id":"https://openalex.org/keywords/dataflow","display_name":"Dataflow","score":0.7131770849227905},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.6658670902252197},{"id":"https://openalex.org/keywords/field-programmable-gate-array","display_name":"Field-programmable gate array","score":0.6221333742141724},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.6197789907455444},{"id":"https://openalex.org/keywords/throughput","display_name":"Throughput","score":0.6018417477607727},{"id":"https://openalex.org/keywords/hardware-acceleration","display_name":"Hardware acceleration","score":0.5537470579147339},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.44828593730926514},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.42572319507598877},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.38996821641921997},{"id":"https://openalex.org/keywords/computer-engineering","display_name":"Computer engineering","score":0.35142821073532104},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.1234898567199707},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.09760400652885437}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8079665899276733},{"id":"https://openalex.org/C96324660","wikidata":"https://www.wikidata.org/wiki/Q205446","display_name":"Dataflow","level":2,"score":0.7131770849227905},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.6658670902252197},{"id":"https://openalex.org/C42935608","wikidata":"https://www.wikidata.org/wiki/Q190411","display_name":"Field-programmable gate array","level":2,"score":0.6221333742141724},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.6197789907455444},{"id":"https://openalex.org/C157764524","wikidata":"https://www.wikidata.org/wiki/Q1383412","display_name":"Throughput","level":3,"score":0.6018417477607727},{"id":"https://openalex.org/C13164978","wikidata":"https://www.wikidata.org/wiki/Q600158","display_name":"Hardware acceleration","level":3,"score":0.5537470579147339},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.44828593730926514},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.42572319507598877},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.38996821641921997},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.35142821073532104},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.1234898567199707},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.09760400652885437},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C555944384","wikidata":"https://www.wikidata.org/wiki/Q249","display_name":"Wireless","level":2,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3626202.3637569","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3626202.3637569","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3626202.3637569","source":null,"license":"cc-by-nc-sa","license_id":"https://openalex.org/licenses/cc-by-nc-sa","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2024 ACM/SIGDA International Symposium on Field Programmable Gate Arrays","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2401.10417","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2401.10417","pdf_url":"https://arxiv.org/pdf/2401.10417","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"doi:10.1145/3626202.3637569","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3626202.3637569","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3626202.3637569","source":null,"license":"cc-by-nc-sa","license_id":"https://openalex.org/licenses/cc-by-nc-sa","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2024 ACM/SIGDA International Symposium on Field Programmable Gate Arrays","raw_type":"proceedings-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/7","display_name":"Affordable and clean energy","score":0.8899999856948853}],"awards":[{"id":"https://openalex.org/G8355257261","display_name":null,"funder_award_id":"2213701,2217003,2324864,2328972","funder_id":"https://openalex.org/F4320323817","funder_display_name":"Universitas Brawijaya"}],"funders":[{"id":"https://openalex.org/F4320323817","display_name":"Universitas Brawijaya","ror":"https://ror.org/01wk3d929"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4391125526.pdf","grobid_xml":"https://content.openalex.org/works/W4391125526.grobid-xml"},"referenced_works_count":51,"referenced_works":["https://openalex.org/W95608104","https://openalex.org/W2133156997","https://openalex.org/W2517617279","https://openalex.org/W2542189141","https://openalex.org/W2606722458","https://openalex.org/W2626696598","https://openalex.org/W2798534715","https://openalex.org/W2883929540","https://openalex.org/W2896983500","https://openalex.org/W2899915146","https://openalex.org/W2944647778","https://openalex.org/W2970971581","https://openalex.org/W2990138404","https://openalex.org/W3008905965","https://openalex.org/W3082569543","https://openalex.org/W3112948415","https://openalex.org/W3116489684","https://openalex.org/W3158233068","https://openalex.org/W3163275603","https://openalex.org/W3170874841","https://openalex.org/W3199934250","https://openalex.org/W3206749822","https://openalex.org/W3213528054","https://openalex.org/W4213446428","https://openalex.org/W4221163762","https://openalex.org/W4230315356","https://openalex.org/W4239385313","https://openalex.org/W4250503569","https://openalex.org/W4256629673","https://openalex.org/W4280489237","https://openalex.org/W4290858055","https://openalex.org/W4291333192","https://openalex.org/W4293024037","https://openalex.org/W4295312788","https://openalex.org/W4307652670","https://openalex.org/W4312121097","https://openalex.org/W4313913124","https://openalex.org/W4319870545","https://openalex.org/W4321637298","https://openalex.org/W4321637455","https://openalex.org/W4360831786","https://openalex.org/W4360831795","https://openalex.org/W4380881063","https://openalex.org/W4386763982","https://openalex.org/W4386977973","https://openalex.org/W4389156555","https://openalex.org/W4389162879","https://openalex.org/W4389166751","https://openalex.org/W4389166756","https://openalex.org/W4389470891","https://openalex.org/W4393145479"],"related_works":["https://openalex.org/W2999668243","https://openalex.org/W2518118925","https://openalex.org/W3159273459","https://openalex.org/W4319952061","https://openalex.org/W4280636456","https://openalex.org/W4388913998","https://openalex.org/W4310584535","https://openalex.org/W3154092384","https://openalex.org/W4295935044","https://openalex.org/W3159906349"],"abstract_inverted_index":{"With":[0],"the":[1,4,8,10,17,23,26,42,51,78,136,170,194,209,217],"increase":[2,50],"in":[3],"computation":[5,13,19],"intensity":[6],"of":[7,25,159],"chip,":[9],"mismatch":[11],"between":[12,89],"layer":[14],"shapes":[15],"and":[16,71,94,119,162,179,189,197,213],"available":[18],"resource":[20],"significantly":[21],"limits":[22],"utilization":[24],"chip.":[27],"Driven":[28],"by":[29],"this":[30,55],"observation,":[31],"prior":[32],"works":[33],"discuss":[34],"spatial":[35,46,115],"accelerators":[36,47,146],"or":[37],"dataflow":[38],"architecture":[39,117],"to":[40,124,143,169,227,230,233],"maximize":[41],"throughput.":[43],"However,":[44],"using":[45],"could":[48],"potentially":[49],"execution":[52,62,92],"latency.":[53],"In":[54],"work,":[56],"we":[57,80,113],"first":[58],"systematically":[59],"investigate":[60],"two":[61,91,97],"models:":[63],"(1)":[64],"sequentially":[65],"(temporally)":[66],"launch":[67,74],"one":[68],"monolithic":[69],"accelerator,":[70],"(2)":[72],"spatially":[73],"multiple":[75],"accelerators.":[76],"From":[77],"observations,":[79],"find":[81],"that":[82],"there":[83],"is":[84],"a":[85,103],"latency":[86,106,211,215],"throughput":[87,107,157,207,219],"tradeoff":[88],"these":[90,96],"models,":[93],"combining":[95],"strategies":[98,127],"together":[99,128],"can":[100],"give":[101],"us":[102],"more":[104],"efficient":[105],"Pareto":[108],"front.":[109],"To":[110],"achieve":[111,205],"this,":[112],"propose":[114],"sequential":[116],"(SSR)":[118],"SSR":[120,145,154,224,232],"design":[121],"automation":[122],"framework":[123],"explore":[125],"both":[126],"when":[129],"deploying":[130],"deep":[131,151],"learning":[132,152],"inference.":[133],"We":[134,221],"use":[135,223,231],"7nm":[137],"AMD":[138,176],"Versal":[139],"ACAP":[140],"VCK190":[141],"board":[142],"implement":[144],"for":[147],"four":[148],"end-to-end":[149],"transformer-based":[150],"models.":[153],"achieves":[155],"average":[156,182],"gains":[158,185],"2.53x,":[160],"35.71x,":[161],"14.20x":[163],"under":[164,208,216],"different":[165],"batch":[166],"sizes":[167],"compared":[168],"8nm":[171],"Nvidia":[172],"GPU":[173],"A10G,":[174],"16nm":[175],"FPGAs":[177],"ZCU102,":[178],"U250.":[180],"The":[181],"energy":[183],"efficiency":[184],"are":[186],"8.51x,":[187],"6.75x,":[188],"21.22x,":[190],"respectively.":[191],"Compared":[192],"with":[193],"sequential-only":[195],"solution":[196,199],"spatial-only":[198],"on":[200,236],"VCK190,":[201],"our":[202],"spatial-sequential-hybrid":[203],"solutions":[204,235],"higher":[206],"same":[210,218],"requirement":[212],"lower":[214],"requirement.":[220],"also":[222],"analytical":[225],"models":[226],"demonstrate":[228],"how":[229],"optimize":[234],"other":[237],"computing":[238],"platforms,":[239],"e.g.,":[240],"14nm":[241],"Intel":[242],"Stratix":[243],"10":[244],"NX.":[245]},"counts_by_year":[{"year":2025,"cited_by_count":18},{"year":2024,"cited_by_count":5}],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2024-01-23T00:00:00"}
