{"id":"https://openalex.org/W2937249639","doi":"https://doi.org/10.1145/3300053.3319418","title":"Detailed Characterization of Deep Neural Networks on GPUs and FPGAs","display_name":"Detailed Characterization of Deep Neural Networks on GPUs and FPGAs","publication_year":2019,"publication_date":"2019-04-10","ids":{"openalex":"https://openalex.org/W2937249639","doi":"https://doi.org/10.1145/3300053.3319418","mag":"2937249639"},"language":"en","primary_location":{"id":"doi:10.1145/3300053.3319418","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3300053.3319418","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 12th Workshop on General Purpose Processing Using GPUs","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5082175708","display_name":"Aajna Karki","orcid":null},"institutions":[{"id":"https://openalex.org/I51504820","display_name":"San Jose State University","ror":"https://ror.org/04qyvz380","country_code":"US","type":"education","lineage":["https://openalex.org/I51504820"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Aajna Karki","raw_affiliation_strings":["Computer Engineering Department, San Jos\u00e9 State University, San Jos\u00e9, CA, USA"],"affiliations":[{"raw_affiliation_string":"Computer Engineering Department, San Jos\u00e9 State University, San Jos\u00e9, CA, USA","institution_ids":["https://openalex.org/I51504820"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5029870539","display_name":"Chethan Palangotu Keshava","orcid":null},"institutions":[{"id":"https://openalex.org/I51504820","display_name":"San Jose State University","ror":"https://ror.org/04qyvz380","country_code":"US","type":"education","lineage":["https://openalex.org/I51504820"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Chethan Palangotu Keshava","raw_affiliation_strings":["Computer Engineering Department, San Jos\u00e9 State University, San Jos\u00e9, CA, USA"],"affiliations":[{"raw_affiliation_string":"Computer Engineering Department, San Jos\u00e9 State University, San Jos\u00e9, CA, USA","institution_ids":["https://openalex.org/I51504820"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5074963559","display_name":"Spoorthi Mysore Shivakumar","orcid":null},"institutions":[{"id":"https://openalex.org/I51504820","display_name":"San Jose State University","ror":"https://ror.org/04qyvz380","country_code":"US","type":"education","lineage":["https://openalex.org/I51504820"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Spoorthi Mysore Shivakumar","raw_affiliation_strings":["Computer Engineering Department, San Jos\u00e9 State University, San Jos\u00e9, CA, USA"],"affiliations":[{"raw_affiliation_string":"Computer Engineering Department, San Jos\u00e9 State University, San Jos\u00e9, CA, USA","institution_ids":["https://openalex.org/I51504820"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5068155003","display_name":"Joshua Skow","orcid":null},"institutions":[{"id":"https://openalex.org/I51504820","display_name":"San Jose State University","ror":"https://ror.org/04qyvz380","country_code":"US","type":"education","lineage":["https://openalex.org/I51504820"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Joshua Skow","raw_affiliation_strings":["Computer Engineering Department, San Jos\u00e9 State University, San Jos\u00e9, CA, USA"],"affiliations":[{"raw_affiliation_string":"Computer Engineering Department, San Jos\u00e9 State University, San Jos\u00e9, CA, USA","institution_ids":["https://openalex.org/I51504820"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043654571","display_name":"Goutam Madhukeshwar Hegde","orcid":null},"institutions":[{"id":"https://openalex.org/I51504820","display_name":"San Jose State University","ror":"https://ror.org/04qyvz380","country_code":"US","type":"education","lineage":["https://openalex.org/I51504820"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Goutam Madhukeshwar Hegde","raw_affiliation_strings":["Computer Engineering Department, San Jos\u00e9 State University, San Jos\u00e9, CA, USA"],"affiliations":[{"raw_affiliation_string":"Computer Engineering Department, San Jos\u00e9 State University, San Jos\u00e9, CA, USA","institution_ids":["https://openalex.org/I51504820"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5081531196","display_name":"Hyeran Jeon","orcid":"https://orcid.org/0000-0002-1767-8198"},"institutions":[{"id":"https://openalex.org/I51504820","display_name":"San Jose State University","ror":"https://ror.org/04qyvz380","country_code":"US","type":"education","lineage":["https://openalex.org/I51504820"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Hyeran Jeon","raw_affiliation_strings":["Computer Engineering Department, San Jos\u00e9 State University, San Jos\u00e9, CA, USA"],"affiliations":[{"raw_affiliation_string":"Computer Engineering Department, San Jos\u00e9 State University, San Jos\u00e9, CA, USA","institution_ids":["https://openalex.org/I51504820"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5082175708"],"corresponding_institution_ids":["https://openalex.org/I51504820"],"apc_list":null,"apc_paid":null,"fwci":3.8523,"has_fulltext":false,"cited_by_count":27,"citation_normalized_percentile":{"value":0.93816854,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":95,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"12","last_page":"21"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10502","display_name":"Advanced Memory and Neural Computing","score":0.9976000189781189,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.8579073548316956},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8315843939781189},{"id":"https://openalex.org/keywords/cuda","display_name":"CUDA","score":0.7449599504470825},{"id":"https://openalex.org/keywords/suite","display_name":"Suite","score":0.7100018858909607},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.6007120609283447},{"id":"https://openalex.org/keywords/field-programmable-gate-array","display_name":"Field-programmable gate array","score":0.593864381313324},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.5611917972564697},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.5519400238990784},{"id":"https://openalex.org/keywords/convolution","display_name":"Convolution (computer science)","score":0.48493149876594543},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.4825558066368103},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.4556657373905182},{"id":"https://openalex.org/keywords/mobile-device","display_name":"Mobile device","score":0.41775667667388916},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.33803635835647583},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.33455103635787964},{"id":"https://openalex.org/keywords/computer-engineering","display_name":"Computer engineering","score":0.3224409818649292},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.2771356701850891},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.23247060179710388}],"concepts":[{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.8579073548316956},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8315843939781189},{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.7449599504470825},{"id":"https://openalex.org/C79581498","wikidata":"https://www.wikidata.org/wiki/Q1367530","display_name":"Suite","level":2,"score":0.7100018858909607},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.6007120609283447},{"id":"https://openalex.org/C42935608","wikidata":"https://www.wikidata.org/wiki/Q190411","display_name":"Field-programmable gate array","level":2,"score":0.593864381313324},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.5611917972564697},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.5519400238990784},{"id":"https://openalex.org/C45347329","wikidata":"https://www.wikidata.org/wiki/Q5166604","display_name":"Convolution (computer science)","level":3,"score":0.48493149876594543},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.4825558066368103},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.4556657373905182},{"id":"https://openalex.org/C186967261","wikidata":"https://www.wikidata.org/wiki/Q5082128","display_name":"Mobile device","level":2,"score":0.41775667667388916},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.33803635835647583},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.33455103635787964},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.3224409818649292},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2771356701850891},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.23247060179710388},{"id":"https://openalex.org/C166957645","wikidata":"https://www.wikidata.org/wiki/Q23498","display_name":"Archaeology","level":1,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C95457728","wikidata":"https://www.wikidata.org/wiki/Q309","display_name":"History","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3300053.3319418","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3300053.3319418","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 12th Workshop on General Purpose Processing Using GPUs","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":32,"referenced_works":["https://openalex.org/W35664875","https://openalex.org/W1508856789","https://openalex.org/W1686810756","https://openalex.org/W1871050032","https://openalex.org/W1979527452","https://openalex.org/W2018658595","https://openalex.org/W2080592089","https://openalex.org/W2093043622","https://openalex.org/W2108598243","https://openalex.org/W2117539524","https://openalex.org/W2136848157","https://openalex.org/W2157331557","https://openalex.org/W2163605009","https://openalex.org/W2194775991","https://openalex.org/W2238992335","https://openalex.org/W2279098554","https://openalex.org/W2319920447","https://openalex.org/W2515080096","https://openalex.org/W2587914027","https://openalex.org/W2612445135","https://openalex.org/W2618530766","https://openalex.org/W2790501674","https://openalex.org/W2888727064","https://openalex.org/W2962835968","https://openalex.org/W2996489182","https://openalex.org/W3118608800","https://openalex.org/W4298875270","https://openalex.org/W4394670654","https://openalex.org/W6655132393","https://openalex.org/W6676297131","https://openalex.org/W6687483927","https://openalex.org/W6891821539"],"related_works":["https://openalex.org/W3062287","https://openalex.org/W2380390332","https://openalex.org/W2742145873","https://openalex.org/W4245975140","https://openalex.org/W2062253548","https://openalex.org/W4225414539","https://openalex.org/W4231704780","https://openalex.org/W4289522463","https://openalex.org/W1977763331","https://openalex.org/W4318483369"],"abstract_inverted_index":{"Deep":[0],"neural":[1,119,124],"networks":[2,120,133],"(DNNs)":[3],"have":[4,26,40],"been":[5,41],"proving":[6],"the":[7,67,77,113],"effectiveness":[8],"in":[9],"various":[10],"computing":[11,17],"fields.":[12],"To":[13,85],"provide":[14,86,127],"more":[15,88],"efficient":[16],"platforms":[18],"for":[19],"DNN":[20,37,51,55,96],"applications,":[21],"it":[22],"is":[23],"essential":[24],"to":[25,48],"evaluation":[27,90],"environments":[28],"that":[29,100,106],"include":[30],"assorted":[31],"benchmark":[32,38,68,97],"workloads.":[33],"Though":[34],"a":[35,72,87,94,141,144,147],"few":[36,73],"suites":[39,69],"recently":[42],"released,":[43],"most":[44,114],"of":[45,66,131],"them":[46,136],"require":[47],"install":[49],"proprietary":[50],"libraries":[52],"or":[53],"resource-intensive":[54],"frameworks,":[56],"which":[57],"can":[58,81,101],"run":[59,102],"only":[60,70],"on":[61,103,137],"certain":[62],"architectures.":[63],"Also,":[64],"some":[65],"support":[71],"per-layer":[74],"functions":[75],"where":[76],"interactions":[78],"between":[79],"layers":[80],"not":[82],"be":[83],"measured.":[84],"scalable":[89],"environment,":[91],"we":[92],"present":[93],"new":[95],"suite,":[98],"Tango,":[99],"any":[104],"platform":[105],"supports":[107],"CUDA":[108],"and":[109,121,143,146],"OpenCL.":[110],"Tango":[111],"includes":[112],"widely":[115],"used":[116],"five":[117],"convolution":[118],"two":[122],"recurrent":[123],"networks.":[125],"We":[126],"in-depth":[128],"architectural":[129],"statistics":[130],"these":[132],"while":[134],"running":[135],"an":[138],"architecture":[139],"simulator,":[140],"server-":[142],"mobile-GPU,":[145],"mobile":[148],"FPGA.":[149]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":4},{"year":2023,"cited_by_count":5},{"year":2022,"cited_by_count":3},{"year":2021,"cited_by_count":5},{"year":2020,"cited_by_count":5},{"year":2019,"cited_by_count":3}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
