{"id":"https://openalex.org/W4395106433","doi":"https://doi.org/10.1145/3620666.3651347","title":"TAPA-CS: Enabling Scalable Accelerator Design on Distributed HBM-FPGAs","display_name":"TAPA-CS: Enabling Scalable Accelerator Design on Distributed HBM-FPGAs","publication_year":2024,"publication_date":"2024-04-24","ids":{"openalex":"https://openalex.org/W4395106433","doi":"https://doi.org/10.1145/3620666.3651347"},"language":"en","primary_location":{"id":"doi:10.1145/3620666.3651347","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3620666.3651347","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 29th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 3","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3620666.3651347","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5092439424","display_name":"Neha Prakriya","orcid":"https://orcid.org/0000-0002-4866-0425"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Neha Prakriya","raw_affiliation_strings":["UCLA, Los Angeles, United States of America"],"raw_orcid":"https://orcid.org/0000-0002-4866-0425","affiliations":[{"raw_affiliation_string":"UCLA, Los Angeles, United States of America","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5074835029","display_name":"Yuze Chi","orcid":null},"institutions":[{"id":"https://openalex.org/I2799798094","display_name":"UCLA Health","ror":"https://ror.org/01d88se56","country_code":"US","type":"healthcare","lineage":["https://openalex.org/I2799798094"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yuze Chi","raw_affiliation_strings":["UCLA, Los Angeles, USA"],"raw_orcid":"https://orcid.org/0000-0002-5885-0425","affiliations":[{"raw_affiliation_string":"UCLA, Los Angeles, USA","institution_ids":["https://openalex.org/I2799798094"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5065485432","display_name":"Suhail Basalama","orcid":"https://orcid.org/0000-0002-8301-8411"},"institutions":[{"id":"https://openalex.org/I2799798094","display_name":"UCLA Health","ror":"https://ror.org/01d88se56","country_code":"US","type":"healthcare","lineage":["https://openalex.org/I2799798094"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Suhail Basalama","raw_affiliation_strings":["UCLA, Los Angeles, USA"],"raw_orcid":"https://orcid.org/0000-0002-8301-8411","affiliations":[{"raw_affiliation_string":"UCLA, Los Angeles, USA","institution_ids":["https://openalex.org/I2799798094"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5036239104","display_name":"Linghao Song","orcid":"https://orcid.org/0000-0002-7450-2842"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Linghao Song","raw_affiliation_strings":["UCLA, Los Angeles, United States of America"],"raw_orcid":"https://orcid.org/0000-0002-7450-2842","affiliations":[{"raw_affiliation_string":"UCLA, Los Angeles, United States of America","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5016776689","display_name":"Jason Cong","orcid":"https://orcid.org/0000-0003-2887-6963"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jason Cong","raw_affiliation_strings":["UCLA, Los Angeles, United States of America"],"raw_orcid":"https://orcid.org/0000-0003-2887-6963","affiliations":[{"raw_affiliation_string":"UCLA, Los Angeles, United States of America","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5092439424"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.3456,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.80773017,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"966","last_page":"980"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10829","display_name":"Interconnection Networks and Systems","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10829","display_name":"Interconnection Networks and Systems","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10904","display_name":"Embedded Systems Design Techniques","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/field-programmable-gate-array","display_name":"Field-programmable gate array","score":0.8002185821533203},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7641698122024536},{"id":"https://openalex.org/keywords/dataflow","display_name":"Dataflow","score":0.5922484397888184},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.5284818410873413},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.4545549154281616},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.44640254974365234},{"id":"https://openalex.org/keywords/stratix","display_name":"Stratix","score":0.4206509590148926},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.3771210014820099},{"id":"https://openalex.org/keywords/computer-hardware","display_name":"Computer hardware","score":0.33856451511383057},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.1271384358406067}],"concepts":[{"id":"https://openalex.org/C42935608","wikidata":"https://www.wikidata.org/wiki/Q190411","display_name":"Field-programmable gate array","level":2,"score":0.8002185821533203},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7641698122024536},{"id":"https://openalex.org/C96324660","wikidata":"https://www.wikidata.org/wiki/Q205446","display_name":"Dataflow","level":2,"score":0.5922484397888184},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.5284818410873413},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.4545549154281616},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.44640254974365234},{"id":"https://openalex.org/C2776277307","wikidata":"https://www.wikidata.org/wiki/Q22074755","display_name":"Stratix","level":3,"score":0.4206509590148926},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.3771210014820099},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.33856451511383057},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.1271384358406067}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3620666.3651347","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3620666.3651347","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 29th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 3","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3620666.3651347","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3620666.3651347","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 29th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 3","raw_type":"proceedings-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/9","display_name":"Industry, innovation and infrastructure","score":0.6299999952316284}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":43,"referenced_works":["https://openalex.org/W1501488688","https://openalex.org/W1686810756","https://openalex.org/W1940012472","https://openalex.org/W2038509324","https://openalex.org/W2060736133","https://openalex.org/W2091158003","https://openalex.org/W2116433835","https://openalex.org/W2127298482","https://openalex.org/W2136058988","https://openalex.org/W2147575032","https://openalex.org/W2160566592","https://openalex.org/W2475840367","https://openalex.org/W2741581557","https://openalex.org/W2788854694","https://openalex.org/W2798956872","https://openalex.org/W2883929540","https://openalex.org/W2884267664","https://openalex.org/W2891946740","https://openalex.org/W2901123440","https://openalex.org/W2945306514","https://openalex.org/W2962953210","https://openalex.org/W3034991763","https://openalex.org/W3035177949","https://openalex.org/W3087961256","https://openalex.org/W3105234421","https://openalex.org/W3127969131","https://openalex.org/W3133347161","https://openalex.org/W3133395503","https://openalex.org/W3156546842","https://openalex.org/W3162360055","https://openalex.org/W3188178661","https://openalex.org/W3188919536","https://openalex.org/W3197264354","https://openalex.org/W4200023416","https://openalex.org/W4206396251","https://openalex.org/W4240110784","https://openalex.org/W4288086140","https://openalex.org/W4293155667","https://openalex.org/W4297649576","https://openalex.org/W4318541659","https://openalex.org/W6639055396","https://openalex.org/W6729596344","https://openalex.org/W6785377221"],"related_works":["https://openalex.org/W1509155667","https://openalex.org/W2999668243","https://openalex.org/W2518118925","https://openalex.org/W3208151864","https://openalex.org/W1564576805","https://openalex.org/W4254372399","https://openalex.org/W1998888015","https://openalex.org/W1967938402","https://openalex.org/W3145068070","https://openalex.org/W2386041993"],"abstract_inverted_index":{"Despite":[0],"the":[1,97,125,149,172,185],"increasing":[2],"adoption":[3],"of":[4,53,112,151],"FPGAs":[5,25,54,134],"in":[6,14,135],"compute":[7],"clouds,":[8],"there":[9],"remains":[10],"a":[11,37,47,51,91,141,196],"significant":[12],"gap":[13],"programming":[15,40],"tools":[16],"and":[17,32,45,59,84,110,114,127,168,175,181],"abstractions":[18],"which":[19,42,72],"can":[20],"leverage":[21,76],"network-connected,":[22],"cloud-scale,":[23],"multi-die":[24],"to":[26,75,99,101,130],"generate":[27],"accelerators":[28],"with":[29,121,202],"high":[30,57,132],"frequency":[31,58,197],"throughput.":[33,60],"We":[34,146],"propose":[35],"TAPA-CS,":[36],"task-parallel":[38],"dataflow":[39],"framework":[41,71],"automatically":[43,95],"partitions":[44,96],"compiles":[46],"large":[48,92],"design":[49,98],"across":[50],"cluster":[52],"while":[55,104],"achieving":[56],"TAPA-CS":[61,94,117,152,193],"has":[62],"three":[63],"main":[64],"contributions.":[65],"First,":[66],"it":[67],"is":[68],"an":[69],"open-source":[70],"allows":[73],"users":[74],"virtually":[77],"\"unlimited\"":[78],"accelerator":[79],"fabric,":[80],"high-bandwidth":[81],"memory":[82],"(HBM),":[83],"on-chip":[85],"memory.":[86],"Second,":[87],"given":[88],"as":[89,163],"input":[90],"design,":[93],"map":[100],"multiple":[102],"FPGAs,":[103],"ensuring":[105],"congestion":[106],"control,":[107],"resource":[108],"balancing,":[109],"overlapping":[111],"communication":[113],"computation.":[115],"Third,":[116],"couples":[118],"coarse-grained":[119],"floor-planning":[120],"interconnect":[122],"pipelining":[123],"at":[124],"inter-":[126],"intra-FPGA":[128],"levels":[129],"ensure":[131],"frequency.":[133],"our":[136],"multi-FPGA":[137],"testbed":[138],"communicate":[139],"through":[140,190],"high-speed":[142],"100Gbps":[143],"Ethernet":[144],"infrastructure.":[145],"have":[147],"evaluated":[148],"performance":[150],"on":[153],"designs,":[154],"including":[155],"systolic-array":[156],"based":[157],"CNNs,":[158],"graph":[159],"processing":[160],"workloads":[161],"such":[162],"page":[164],"rank,":[165],"stencil":[166],"applications,":[167],"KNN.":[169],"On":[170],"average,":[171],"2-,":[173],"3-,":[174],"4-FPGA":[176],"designs":[177],"are":[178],"2.1\u00d7,":[179],"3.2\u00d7,":[180],"4.4\u00d7":[182],"faster":[183],"than":[184],"single":[186],"FPGA":[187],"baselines":[188],"generated":[189],"Vitis":[191,203],"HLS.":[192,204],"also":[194],"achieves":[195],"improvement":[198],"between":[199],"11%-116%":[200],"compared":[201]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
