{"id":"https://openalex.org/W2094786337","doi":"https://doi.org/10.1145/2400682.2400684","title":"A performance and energy comparison of convolution on GPUs, FPGAs, and multicore processors","display_name":"A performance and energy comparison of convolution on GPUs, FPGAs, and multicore processors","publication_year":2013,"publication_date":"2013-01-01","ids":{"openalex":"https://openalex.org/W2094786337","doi":"https://doi.org/10.1145/2400682.2400684","mag":"2094786337"},"language":"en","primary_location":{"id":"doi:10.1145/2400682.2400684","is_oa":true,"landing_page_url":"https://doi.org/10.1145/2400682.2400684","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/2400682.2400684","source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"bronze","oa_url":"https://dl.acm.org/doi/pdf/10.1145/2400682.2400684","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5016246137","display_name":"Jeremy Fowers","orcid":null},"institutions":[{"id":"https://openalex.org/I33213144","display_name":"University of Florida","ror":"https://ror.org/02y3ad647","country_code":"US","type":"education","lineage":["https://openalex.org/I33213144"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Jeremy Fowers","raw_affiliation_strings":["University of Florida","[University of Florida]"],"affiliations":[{"raw_affiliation_string":"University of Florida","institution_ids":["https://openalex.org/I33213144"]},{"raw_affiliation_string":"[University of Florida]","institution_ids":["https://openalex.org/I33213144"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5087136024","display_name":"G.R. Brown","orcid":null},"institutions":[{"id":"https://openalex.org/I33213144","display_name":"University of Florida","ror":"https://ror.org/02y3ad647","country_code":"US","type":"education","lineage":["https://openalex.org/I33213144"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Greg Brown","raw_affiliation_strings":["University of Florida","[University of Florida]"],"affiliations":[{"raw_affiliation_string":"University of Florida","institution_ids":["https://openalex.org/I33213144"]},{"raw_affiliation_string":"[University of Florida]","institution_ids":["https://openalex.org/I33213144"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026857376","display_name":"John Wernsing","orcid":null},"institutions":[{"id":"https://openalex.org/I33213144","display_name":"University of Florida","ror":"https://ror.org/02y3ad647","country_code":"US","type":"education","lineage":["https://openalex.org/I33213144"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"John Wernsing","raw_affiliation_strings":["University of Florida","[University of Florida]"],"affiliations":[{"raw_affiliation_string":"University of Florida","institution_ids":["https://openalex.org/I33213144"]},{"raw_affiliation_string":"[University of Florida]","institution_ids":["https://openalex.org/I33213144"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5088031457","display_name":"Greg Stitt","orcid":"https://orcid.org/0000-0001-7159-7439"},"institutions":[{"id":"https://openalex.org/I33213144","display_name":"University of Florida","ror":"https://ror.org/02y3ad647","country_code":"US","type":"education","lineage":["https://openalex.org/I33213144"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Greg Stitt","raw_affiliation_strings":["University of Florida","[University of Florida]"],"affiliations":[{"raw_affiliation_string":"University of Florida","institution_ids":["https://openalex.org/I33213144"]},{"raw_affiliation_string":"[University of Florida]","institution_ids":["https://openalex.org/I33213144"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5016246137"],"corresponding_institution_ids":["https://openalex.org/I33213144"],"apc_list":null,"apc_paid":null,"fwci":5.0744,"has_fulltext":true,"cited_by_count":34,"citation_normalized_percentile":{"value":0.95341027,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":"9","issue":"4","first_page":"1","last_page":"21"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10904","display_name":"Embedded Systems Design Techniques","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10829","display_name":"Interconnection Networks and Systems","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8447515964508057},{"id":"https://openalex.org/keywords/field-programmable-gate-array","display_name":"Field-programmable gate array","score":0.6811845898628235},{"id":"https://openalex.org/keywords/graphics","display_name":"Graphics","score":0.6216182708740234},{"id":"https://openalex.org/keywords/convolution","display_name":"Convolution (computer science)","score":0.580859899520874},{"id":"https://openalex.org/keywords/multi-core-processor","display_name":"Multi-core processor","score":0.5358449220657349},{"id":"https://openalex.org/keywords/field","display_name":"Field (mathematics)","score":0.5193477869033813},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.5030328631401062},{"id":"https://openalex.org/keywords/design-space-exploration","display_name":"Design space exploration","score":0.4966464638710022},{"id":"https://openalex.org/keywords/energy","display_name":"Energy (signal processing)","score":0.47951725125312805},{"id":"https://openalex.org/keywords/efficient-energy-use","display_name":"Efficient energy use","score":0.4764302670955658},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.4693518579006195},{"id":"https://openalex.org/keywords/parallelism","display_name":"Parallelism (grammar)","score":0.4674433469772339},{"id":"https://openalex.org/keywords/signal-processing","display_name":"Signal processing","score":0.41288357973098755},{"id":"https://openalex.org/keywords/cuda","display_name":"CUDA","score":0.41055917739868164},{"id":"https://openalex.org/keywords/digital-signal-processing","display_name":"Digital signal processing","score":0.3840673565864563},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.3803381323814392},{"id":"https://openalex.org/keywords/computer-hardware","display_name":"Computer hardware","score":0.3493896424770355},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.14050936698913574},{"id":"https://openalex.org/keywords/computer-graphics","display_name":"Computer graphics (images)","score":0.11399012804031372}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8447515964508057},{"id":"https://openalex.org/C42935608","wikidata":"https://www.wikidata.org/wiki/Q190411","display_name":"Field-programmable gate array","level":2,"score":0.6811845898628235},{"id":"https://openalex.org/C21442007","wikidata":"https://www.wikidata.org/wiki/Q1027879","display_name":"Graphics","level":2,"score":0.6216182708740234},{"id":"https://openalex.org/C45347329","wikidata":"https://www.wikidata.org/wiki/Q5166604","display_name":"Convolution (computer science)","level":3,"score":0.580859899520874},{"id":"https://openalex.org/C78766204","wikidata":"https://www.wikidata.org/wiki/Q555032","display_name":"Multi-core processor","level":2,"score":0.5358449220657349},{"id":"https://openalex.org/C9652623","wikidata":"https://www.wikidata.org/wiki/Q190109","display_name":"Field (mathematics)","level":2,"score":0.5193477869033813},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.5030328631401062},{"id":"https://openalex.org/C2776221188","wikidata":"https://www.wikidata.org/wiki/Q21072556","display_name":"Design space exploration","level":2,"score":0.4966464638710022},{"id":"https://openalex.org/C186370098","wikidata":"https://www.wikidata.org/wiki/Q442787","display_name":"Energy (signal processing)","level":2,"score":0.47951725125312805},{"id":"https://openalex.org/C2742236","wikidata":"https://www.wikidata.org/wiki/Q924713","display_name":"Efficient energy use","level":2,"score":0.4764302670955658},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.4693518579006195},{"id":"https://openalex.org/C2781172179","wikidata":"https://www.wikidata.org/wiki/Q853109","display_name":"Parallelism (grammar)","level":2,"score":0.4674433469772339},{"id":"https://openalex.org/C104267543","wikidata":"https://www.wikidata.org/wiki/Q208163","display_name":"Signal processing","level":3,"score":0.41288357973098755},{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.41055917739868164},{"id":"https://openalex.org/C84462506","wikidata":"https://www.wikidata.org/wiki/Q173142","display_name":"Digital signal processing","level":2,"score":0.3840673565864563},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.3803381323814392},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.3493896424770355},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.14050936698913574},{"id":"https://openalex.org/C121684516","wikidata":"https://www.wikidata.org/wiki/Q7600677","display_name":"Computer graphics (images)","level":1,"score":0.11399012804031372},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C202444582","wikidata":"https://www.wikidata.org/wiki/Q837863","display_name":"Pure mathematics","level":1,"score":0.0},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.0},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/2400682.2400684","is_oa":true,"landing_page_url":"https://doi.org/10.1145/2400682.2400684","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/2400682.2400684","source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1145/2400682.2400684","is_oa":true,"landing_page_url":"https://doi.org/10.1145/2400682.2400684","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/2400682.2400684","source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"},"sustainable_development_goals":[{"display_name":"Affordable and clean energy","score":0.75,"id":"https://metadata.un.org/sdg/7"}],"awards":[{"id":"https://openalex.org/G6621417764","display_name":"CSR: Small: Elastic Computing - An Enabling Technology for Transparent, Portable, and Adaptive Multi-Core Heterogeneous Computing","funder_award_id":"0914474","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G7983578312","display_name":null,"funder_award_id":"CNS-0914474","funder_id":"https://openalex.org/F4320337388","funder_display_name":"Division of Computer and Network Systems"},{"id":"https://openalex.org/G848032724","display_name":null,"funder_award_id":"Science","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320337388","display_name":"Division of Computer and Network Systems","ror":"https://ror.org/02rdzmk74"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2094786337.pdf","grobid_xml":"https://content.openalex.org/works/W2094786337.grobid-xml"},"referenced_works_count":28,"referenced_works":["https://openalex.org/W101494604","https://openalex.org/W1553214226","https://openalex.org/W1583432580","https://openalex.org/W1966360493","https://openalex.org/W1987873989","https://openalex.org/W2022020111","https://openalex.org/W2043815169","https://openalex.org/W2059536715","https://openalex.org/W2068371654","https://openalex.org/W2076186064","https://openalex.org/W2101620555","https://openalex.org/W2102182691","https://openalex.org/W2111953042","https://openalex.org/W2113755305","https://openalex.org/W2126256127","https://openalex.org/W2142401087","https://openalex.org/W2144947977","https://openalex.org/W2147903032","https://openalex.org/W2154991996","https://openalex.org/W2157749802","https://openalex.org/W2160291305","https://openalex.org/W2161761794","https://openalex.org/W2161835356","https://openalex.org/W2295862081","https://openalex.org/W2613360538","https://openalex.org/W4214544044","https://openalex.org/W4235881779","https://openalex.org/W4250981202"],"related_works":["https://openalex.org/W3062287","https://openalex.org/W2380390332","https://openalex.org/W2742145873","https://openalex.org/W4245975140","https://openalex.org/W2062253548","https://openalex.org/W4225414539","https://openalex.org/W4289522463","https://openalex.org/W1977763331","https://openalex.org/W4318483369","https://openalex.org/W2002560966"],"abstract_inverted_index":{"Recent":[0],"architectural":[1],"trends":[2],"have":[3,26],"focused":[4],"on":[5,72],"increased":[6,12],"parallelism":[7],"via":[8,14],"multicore":[9],"processors":[10],"and":[11,29,76,113,123],"heterogeneity":[13],"accelerator":[15,42],"devices":[16],"(e.g.,":[17],"graphics-processing":[18,74],"units,":[19,75],"field-programmable":[20,77],"gate":[21,78],"arrays).":[22],"Although":[23],"these":[24],"architectures":[25],"significant":[27],"performance":[28,122],"energy":[30],"potential,":[31],"application":[32,83],"designers":[33,95],"face":[34],"many":[35,81],"device-specific":[36],"challenges":[37],"when":[38,44],"choosing":[39],"an":[40,46,49,90],"appropriate":[41],"or":[43],"customizing":[45],"algorithm":[47],"for":[48,100],"accelerator.":[50],"To":[51],"help":[52],"address":[53],"this":[54,57,92],"problem,":[55],"in":[56,69],"article":[58,93],"we":[59],"thoroughly":[60],"evaluate":[61,85],"convolution,":[62],"one":[63],"of":[64,89,107],"the":[65],"most":[66],"common":[67],"operations":[68],"digital-signal":[70],"processing,":[71],"multicores,":[73],"arrays.":[79],"Whereas":[80],"previous":[82],"studies":[84],"a":[86],"specific":[87],"usage":[88],"application,":[91],"assists":[94],"with":[96],"design":[97],"space":[98],"exploration":[99],"numerous":[101],"use":[102],"cases":[103],"by":[104],"analyzing":[105],"effects":[106],"different":[108,111,114],"input":[109],"sizes,":[110],"algorithms,":[112],"devices,":[115],"while":[116],"also":[117],"determining":[118],"Pareto-optimal":[119],"trade-offs":[120],"between":[121],"energy.":[124]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":2},{"year":2019,"cited_by_count":1},{"year":2018,"cited_by_count":7},{"year":2017,"cited_by_count":3},{"year":2016,"cited_by_count":3},{"year":2015,"cited_by_count":5},{"year":2014,"cited_by_count":7},{"year":2013,"cited_by_count":1}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
