{"id":"https://openalex.org/W2978376504","doi":"https://doi.org/10.1145/3394116","title":"GPU Fast Convolution via the Overlap-and-Save Method in Shared Memory","display_name":"GPU Fast Convolution via the Overlap-and-Save Method in Shared Memory","publication_year":2020,"publication_date":"2020-07-07","ids":{"openalex":"https://openalex.org/W2978376504","doi":"https://doi.org/10.1145/3394116","mag":"2978376504"},"language":"en","primary_location":{"id":"doi:10.1145/3394116","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3394116","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3394116","source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3394116","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5050123367","display_name":"Karel Ad\u00e1mek","orcid":"https://orcid.org/0000-0003-2797-0595"},"institutions":[{"id":"https://openalex.org/I40120149","display_name":"University of Oxford","ror":"https://ror.org/052gg0110","country_code":"GB","type":"education","lineage":["https://openalex.org/I40120149"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Karel Ad\u00e1mek","raw_affiliation_strings":["Oxford e-Research Centre, Department of Engineering Science, University of Oxford, Oxford, United Kingdom"],"raw_orcid":"https://orcid.org/0000-0003-2797-0595","affiliations":[{"raw_affiliation_string":"Oxford e-Research Centre, Department of Engineering Science, University of Oxford, Oxford, United Kingdom","institution_ids":["https://openalex.org/I40120149"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5041971647","display_name":"Sofia Dimoudi","orcid":"https://orcid.org/0000-0002-0967-1332"},"institutions":[{"id":"https://openalex.org/I190082696","display_name":"Durham University","ror":"https://ror.org/01v29qb04","country_code":"GB","type":"education","lineage":["https://openalex.org/I190082696"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Sofia Dimoudi","raw_affiliation_strings":["Centre for Advanced Instrumentation, Durham University, Durham, United Kingdom"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Centre for Advanced Instrumentation, Durham University, Durham, United Kingdom","institution_ids":["https://openalex.org/I190082696"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Mike Giles","orcid":null},"institutions":[{"id":"https://openalex.org/I40120149","display_name":"University of Oxford","ror":"https://ror.org/052gg0110","country_code":"GB","type":"education","lineage":["https://openalex.org/I40120149"]},{"id":"https://openalex.org/I4210120676","display_name":"Mathematical Institute of the Slovak Academy of Sciences","ror":"https://ror.org/02fhy7464","country_code":"SK","type":"facility","lineage":["https://openalex.org/I207624831","https://openalex.org/I4210120676"]}],"countries":["GB","SK"],"is_corresponding":false,"raw_author_name":"Mike Giles","raw_affiliation_strings":["Mathematical Institute, University of Oxford, Oxford, United Kingdom","MATHEMATICAL INSTITUTE; UNIVERSITY OF OXFORD; OXFORD UNITED KINGDOM"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Mathematical Institute, University of Oxford, Oxford, United Kingdom","institution_ids":["https://openalex.org/I40120149"]},{"raw_affiliation_string":"MATHEMATICAL INSTITUTE; UNIVERSITY OF OXFORD; OXFORD UNITED KINGDOM","institution_ids":["https://openalex.org/I4210120676"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5080149070","display_name":"Wesley Armour","orcid":"https://orcid.org/0000-0003-1756-3064"},"institutions":[{"id":"https://openalex.org/I40120149","display_name":"University of Oxford","ror":"https://ror.org/052gg0110","country_code":"GB","type":"education","lineage":["https://openalex.org/I40120149"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Wesley Armour","raw_affiliation_strings":["Oxford e-Research Centre, Department of Engineering Science, University of Oxford, Oxford, United Kingdom","University of Oxford"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Oxford e-Research Centre, Department of Engineering Science, University of Oxford, Oxford, United Kingdom","institution_ids":["https://openalex.org/I40120149"]},{"raw_affiliation_string":"University of Oxford","institution_ids":["https://openalex.org/I40120149"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5050123367"],"corresponding_institution_ids":["https://openalex.org/I40120149"],"apc_list":null,"apc_paid":null,"fwci":0.3039,"has_fulltext":true,"cited_by_count":4,"citation_normalized_percentile":{"value":0.50818816,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":95},"biblio":{"volume":"17","issue":"3","first_page":"1","last_page":"20"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11034","display_name":"Digital Filter Design and Implementation","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11034","display_name":"Digital Filter Design and Implementation","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11697","display_name":"Numerical Methods and Algorithms","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10688","display_name":"Image and Signal Denoising Methods","score":0.9976999759674072,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8483790159225464},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.8057626485824585},{"id":"https://openalex.org/keywords/fast-fourier-transform","display_name":"Fast Fourier transform","score":0.7932870984077454},{"id":"https://openalex.org/keywords/cuda","display_name":"CUDA","score":0.7728326320648193},{"id":"https://openalex.org/keywords/convolution","display_name":"Convolution (computer science)","score":0.7223132252693176},{"id":"https://openalex.org/keywords/shared-memory","display_name":"Shared memory","score":0.5993200540542603},{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.44253110885620117},{"id":"https://openalex.org/keywords/computational-science","display_name":"Computational science","score":0.36546510457992554},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.327577143907547},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.07917061448097229}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8483790159225464},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.8057626485824585},{"id":"https://openalex.org/C75172450","wikidata":"https://www.wikidata.org/wiki/Q623950","display_name":"Fast Fourier transform","level":2,"score":0.7932870984077454},{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.7728326320648193},{"id":"https://openalex.org/C45347329","wikidata":"https://www.wikidata.org/wiki/Q5166604","display_name":"Convolution (computer science)","level":3,"score":0.7223132252693176},{"id":"https://openalex.org/C133875982","wikidata":"https://www.wikidata.org/wiki/Q764810","display_name":"Shared memory","level":2,"score":0.5993200540542603},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.44253110885620117},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.36546510457992554},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.327577143907547},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.07917061448097229},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.0}],"mesh":[],"locations_count":7,"locations":[{"id":"doi:10.1145/3394116","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3394116","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3394116","source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"},{"id":"pmh:oai:dro.dur.ac.uk.OAI2:32187","is_oa":false,"landing_page_url":"http://dro.dur.ac.uk/32187/","pdf_url":null,"source":{"id":"https://openalex.org/S4377196258","display_name":"Durham Research Online (Durham University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I190082696","host_organization_name":"Durham University","host_organization_lineage":["https://openalex.org/I190082696"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"acceptedVersion","is_accepted":true,"is_published":false,"raw_source_name":"ACM transactions on architecture and code optimization, 2020, Vol.17(3), pp.18 [Peer Reviewed Journal]","raw_type":"Article"},{"id":"pmh:oai:arXiv.org:1910.01972","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1910.01972","pdf_url":"https://arxiv.org/pdf/1910.01972","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"mag:2978376504","is_oa":true,"landing_page_url":"https://arxiv.org/pdf/1910.01972","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"pmh:oai:durham-repository.worktribe.com:1250730","is_oa":true,"landing_page_url":"https://durham-repository.worktribe.com/output/1250730","pdf_url":null,"source":{"id":"https://openalex.org/S4306400188","display_name":"Durham Research Online (Durham University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I190082696","host_organization_name":"Durham University","host_organization_lineage":["https://openalex.org/I190082696"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"publishedVersion"},{"id":"pmh:oai:ora.ox.ac.uk:uuid:9106140b-a133-4d6b-8ec6-7233c3d39ea6","is_oa":true,"landing_page_url":"https://ora.ox.ac.uk/objects/uuid:9106140b-a133-4d6b-8ec6-7233c3d39ea6","pdf_url":null,"source":{"id":"https://openalex.org/S4306402636","display_name":"Oxford University Research Archive (ORA) (University of Oxford)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I40120149","host_organization_name":"University of Oxford","host_organization_lineage":["https://openalex.org/I40120149"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Symplectic Elements","raw_type":"Journal article"},{"id":"doi:10.48550/arxiv.1910.01972","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.1910.01972","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article-journal"}],"best_oa_location":{"id":"doi:10.1145/3394116","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3394116","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3394116","source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"},"sustainable_development_goals":[{"score":0.4099999964237213,"display_name":"Affordable and clean energy","id":"https://metadata.un.org/sdg/7"}],"awards":[{"id":"https://openalex.org/G1404762679","display_name":null,"funder_award_id":"ST/R000557/1","funder_id":"https://openalex.org/F4320337760","funder_display_name":"Central Laser Facility, Science and Technology Facilities Council"},{"id":"https://openalex.org/G2266506138","display_name":null,"funder_award_id":"EP/J010553/1","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"},{"id":"https://openalex.org/G2784483126","display_name":null,"funder_award_id":"EP/K000136/1","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"},{"id":"https://openalex.org/G6641252456","display_name":null,"funder_award_id":"EP/P020275/1","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"},{"id":"https://openalex.org/G7092439882","display_name":null,"funder_award_id":"ST/R000557/1","funder_id":"https://openalex.org/F4320334632","funder_display_name":"Science and Technology Facilities Council"},{"id":"https://openalex.org/G8678996877","display_name":null,"funder_award_id":"EP/K000144/1","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"},{"id":"https://openalex.org/G8762133647","display_name":"SKA Preconstruction 2017-18","funder_award_id":"ST/R000557/1","funder_id":"https://openalex.org/F4320334632","funder_display_name":"Science and Technology Facilities Council"}],"funders":[{"id":"https://openalex.org/F4320319993","display_name":"Leverhulme Trust","ror":"https://ror.org/012mzw131"},{"id":"https://openalex.org/F4320320290","display_name":"University of Oxford","ror":"https://ror.org/052gg0110"},{"id":"https://openalex.org/F4320334627","display_name":"Engineering and Physical Sciences Research Council","ror":"https://ror.org/0439y7842"},{"id":"https://openalex.org/F4320334632","display_name":"Science and Technology Facilities Council","ror":"https://ror.org/057g20z61"},{"id":"https://openalex.org/F4320337760","display_name":"Central Laser Facility, Science and Technology Facilities Council","ror":null}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2978376504.pdf","grobid_xml":"https://content.openalex.org/works/W2978376504.grobid-xml"},"referenced_works_count":21,"referenced_works":["https://openalex.org/W40470407","https://openalex.org/W123564486","https://openalex.org/W1789336918","https://openalex.org/W1863908278","https://openalex.org/W1970550611","https://openalex.org/W2016677154","https://openalex.org/W2047188941","https://openalex.org/W2061171222","https://openalex.org/W2072690750","https://openalex.org/W2094786337","https://openalex.org/W2108600626","https://openalex.org/W2123066648","https://openalex.org/W2134572726","https://openalex.org/W2172654076","https://openalex.org/W2236740621","https://openalex.org/W2771859531","https://openalex.org/W2796505725","https://openalex.org/W2945580137","https://openalex.org/W3013580005","https://openalex.org/W3101533771","https://openalex.org/W3147878143"],"related_works":["https://openalex.org/W3040913567","https://openalex.org/W2009516879","https://openalex.org/W2739151800","https://openalex.org/W2358825479","https://openalex.org/W2114927422","https://openalex.org/W2074570088","https://openalex.org/W1967802285","https://openalex.org/W2559874011","https://openalex.org/W2077712265","https://openalex.org/W2124224644","https://openalex.org/W2731702594","https://openalex.org/W2806059703","https://openalex.org/W2364870837","https://openalex.org/W2184317537","https://openalex.org/W2072912074","https://openalex.org/W2806428272","https://openalex.org/W3095498018","https://openalex.org/W2353852602","https://openalex.org/W2966756035","https://openalex.org/W2360456913"],"abstract_inverted_index":{"We":[0,26,47,64],"present":[1],"an":[2,52],"implementation":[3,50,53],"of":[4,13,54,86],"the":[5,11,33,55,59,83,87],"overlap-and-save":[6,56,88],"method,":[7],"a":[8,69],"method":[9,89],"for":[10,43,77],"convolution":[12],"very":[14],"long":[15],"signals":[16],"with":[17,51],"short":[18],"response":[19],"functions,":[20],"which":[21,37],"is":[22],"tailored":[23],"to":[24],"GPUs.":[25,91],"have":[27],"implemented":[28],"several":[29],"FFT":[30,61],"algorithms":[31],"(using":[32],"CUDA":[34],"programming":[35],"language),":[36],"exploit":[38],"GPU":[39,44],"shared":[40],"memory,":[41],"allowing":[42],"accelerated":[45],"convolution.":[46],"compare":[48],"our":[49],"algorithm":[57],"utilizing":[58],"NVIDIA":[60],"library":[62],"(cuFFT).":[63],"demonstrate":[65],"that":[66],"by":[67],"using":[68],"shared-memory-based":[70],"FFT,":[71],"we":[72],"can":[73],"achieved":[74],"significant":[75],"speed-ups":[76],"certain":[78],"problem":[79],"sizes":[80],"and":[81],"lower":[82],"memory":[84],"requirements":[85],"on":[90]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
