{"id":"https://openalex.org/W2794298939","doi":"https://doi.org/10.1145/3174243.3174982","title":"Mapping Large-Scale DNNs on Asymmetric FPGAs","display_name":"Mapping Large-Scale DNNs on Asymmetric FPGAs","publication_year":2018,"publication_date":"2018-02-15","ids":{"openalex":"https://openalex.org/W2794298939","doi":"https://doi.org/10.1145/3174243.3174982","mag":"2794298939"},"language":"en","primary_location":{"id":"doi:10.1145/3174243.3174982","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3174243.3174982","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2018 ACM/SIGDA International Symposium on Field-Programmable Gate Arrays","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101968854","display_name":"Wentai Zhang","orcid":"https://orcid.org/0000-0003-1876-6242"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Wentai Zhang","raw_affiliation_strings":["Peking University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5029153042","display_name":"Jiaxi Zhang","orcid":"https://orcid.org/0000-0001-6177-8690"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiaxi Zhang","raw_affiliation_strings":["Peking University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5002840650","display_name":"Minghua Shen","orcid":"https://orcid.org/0000-0003-4747-8020"},"institutions":[{"id":"https://openalex.org/I157773358","display_name":"Sun Yat-sen University","ror":"https://ror.org/0064kty71","country_code":"CN","type":"education","lineage":["https://openalex.org/I157773358"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Minghua Shen","raw_affiliation_strings":["Sun Yat-sen University, Guangzhou, China"],"affiliations":[{"raw_affiliation_string":"Sun Yat-sen University, Guangzhou, China","institution_ids":["https://openalex.org/I157773358"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5023506057","display_name":"Nong Xiao","orcid":"https://orcid.org/0000-0002-2166-977X"},"institutions":[{"id":"https://openalex.org/I157773358","display_name":"Sun Yat-sen University","ror":"https://ror.org/0064kty71","country_code":"CN","type":"education","lineage":["https://openalex.org/I157773358"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Nong Xiao","raw_affiliation_strings":["Sun Yat-sen University, Guangzhou, China"],"affiliations":[{"raw_affiliation_string":"Sun Yat-sen University, Guangzhou, China","institution_ids":["https://openalex.org/I157773358"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5023468643","display_name":"Guojie Luo","orcid":"https://orcid.org/0000-0003-4932-3655"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Guojie Luo","raw_affiliation_strings":["Peking University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5101968854"],"corresponding_institution_ids":["https://openalex.org/I20231570"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.02370603,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"291","last_page":"291"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10502","display_name":"Advanced Memory and Neural Computing","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10320","display_name":"Neural Networks and Applications","score":0.9939000010490417,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/field-programmable-gate-array","display_name":"Field-programmable gate array","score":0.8573944568634033},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7859774827957153},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.5147983431816101},{"id":"https://openalex.org/keywords/scale","display_name":"Scale (ratio)","score":0.5059654116630554},{"id":"https://openalex.org/keywords/reuse","display_name":"Reuse","score":0.480588436126709},{"id":"https://openalex.org/keywords/bandwidth","display_name":"Bandwidth (computing)","score":0.47810500860214233},{"id":"https://openalex.org/keywords/resource","display_name":"Resource (disambiguation)","score":0.42790257930755615},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.39088311791419983},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.38988131284713745},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.3425614833831787},{"id":"https://openalex.org/keywords/computer-network","display_name":"Computer network","score":0.1993863880634308}],"concepts":[{"id":"https://openalex.org/C42935608","wikidata":"https://www.wikidata.org/wiki/Q190411","display_name":"Field-programmable gate array","level":2,"score":0.8573944568634033},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7859774827957153},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.5147983431816101},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.5059654116630554},{"id":"https://openalex.org/C206588197","wikidata":"https://www.wikidata.org/wiki/Q846574","display_name":"Reuse","level":2,"score":0.480588436126709},{"id":"https://openalex.org/C2776257435","wikidata":"https://www.wikidata.org/wiki/Q1576430","display_name":"Bandwidth (computing)","level":2,"score":0.47810500860214233},{"id":"https://openalex.org/C206345919","wikidata":"https://www.wikidata.org/wiki/Q20380951","display_name":"Resource (disambiguation)","level":2,"score":0.42790257930755615},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.39088311791419983},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.38988131284713745},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.3425614833831787},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.1993863880634308},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C18903297","wikidata":"https://www.wikidata.org/wiki/Q7150","display_name":"Ecology","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3174243.3174982","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3174243.3174982","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2018 ACM/SIGDA International Symposium on Field-Programmable Gate Arrays","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Decent work and economic growth","score":0.4300000071525574,"id":"https://metadata.un.org/sdg/8"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2111241003","https://openalex.org/W2384475851","https://openalex.org/W2000444236","https://openalex.org/W2355315220","https://openalex.org/W4200391368","https://openalex.org/W2210979487","https://openalex.org/W2074043759","https://openalex.org/W1967938402","https://openalex.org/W2386041993","https://openalex.org/W1608572506"],"abstract_inverted_index":{"FPGAs":[0,109,144],"are":[1,110],"very":[2,25],"attractive":[3],"to":[4,28,53,66,94,113,117,145],"accelerate":[5],"the":[6,54,74,101,115,119,123,135,153],"deep":[7],"neural":[8,75],"networks":[9],"(DNNs).":[10],"While":[11],"single-FPGA":[12,56],"can":[13,79],"provide":[14,146],"good":[15],"performance":[16,120],"for":[17,21,43,59,68],"small-scale":[18],"DNNs,":[19,61],"support":[20],"large-scale":[22,45,60,136],"DNNs":[23,46],"is":[24,92],"limited":[26],"due":[27],"they":[29],"require":[30],"higher":[31,69],"resource":[32,57,84],"demand.":[33],"In":[34,71],"this":[35,72,96],"paper,":[36],"we":[37,62],"propose":[38],"an":[39,48,147],"efficient":[40],"mapping":[41,77],"approach":[42,141],"accelerating":[44],"on":[47],"asymmetric":[49],"multi-FPGA":[50,64],"architecture.":[51],"Relative":[52],"state-of-the-art":[55,154],"reuse":[58],"consider":[63],"fashion":[65],"strive":[67],"performance.":[70],"fashion,":[73],"network":[76,102],"problem":[78,97],"be":[80],"formulated":[81],"as":[82],"a":[83,88,129],"allocation":[85],"problem,":[86],"and":[87,104],"dynamic":[89],"programming-based":[90],"partitioning":[91,116],"designed":[93],"solve":[95],"optimally.":[98],"Notice":[99],"that":[100,139],"topology":[103],"communication":[105],"bandwidth":[106],"of":[107,125,149],"multiple":[108],"always":[111],"used":[112],"guide":[114],"boost":[118],"while":[121],"satisfying":[122],"constraints":[124],"resource-performance":[126],"trade-off":[127],"in":[128],"single":[130],"FPGA.":[131],"Experimental":[132],"results":[133],"using":[134],"ResNet-152":[137],"demonstrate":[138],"our":[140],"deploys":[142],"sixteen":[143],"advantage":[148],"16.4x":[150],"GOPS":[151],"over":[152],"work.":[155]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
