{"id":"https://openalex.org/W7120111704","doi":"https://doi.org/10.1007/s10586-025-05895-9","title":"Open CUDA convolution neural network inference implementation","display_name":"Open CUDA convolution neural network inference implementation","publication_year":2026,"publication_date":"2026-01-09","ids":{"openalex":"https://openalex.org/W7120111704","doi":"https://doi.org/10.1007/s10586-025-05895-9"},"language":"en","primary_location":{"id":"doi:10.1007/s10586-025-05895-9","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10586-025-05895-9","pdf_url":"https://link.springer.com/content/pdf/10.1007/s10586-025-05895-9.pdf","source":{"id":"https://openalex.org/S106148199","display_name":"Cluster Computing","issn_l":"1386-7857","issn":["1386-7857","1573-7543"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Cluster Computing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://link.springer.com/content/pdf/10.1007/s10586-025-05895-9.pdf","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5011236085","display_name":"Paulo A. C. Lopes","orcid":"https://orcid.org/0000-0002-9045-0413"},"institutions":[{"id":"https://openalex.org/I121345201","display_name":"Instituto de Engenharia de Sistemas e Computadores Investiga\u00e7\u00e3o e Desenvolvimento","ror":"https://ror.org/04mqy3p58","country_code":"PT","type":"nonprofit","lineage":["https://openalex.org/I121345201","https://openalex.org/I4210125590"]}],"countries":["PT"],"is_corresponding":true,"raw_author_name":"Paulo Lopes","raw_affiliation_strings":["Instituto de Engenharia de Sistemas e Computadores: Investiga\u00e7\u00e3o e Desenvolvimento em Lisboa (INESC-ID), Instituto Superior T\u00e9cnico, Universidade de Lisboa, Rua Alves Redol, Lisboa, 1000-029, Portugal"],"affiliations":[{"raw_affiliation_string":"Instituto de Engenharia de Sistemas e Computadores: Investiga\u00e7\u00e3o e Desenvolvimento em Lisboa (INESC-ID), Instituto Superior T\u00e9cnico, Universidade de Lisboa, Rua Alves Redol, Lisboa, 1000-029, Portugal","institution_ids":["https://openalex.org/I121345201"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5011236085"],"corresponding_institution_ids":["https://openalex.org/I121345201"],"apc_list":{"value":2190,"currency":"EUR","value_usd":2790},"apc_paid":{"value":2190,"currency":"EUR","value_usd":2790},"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.05825427,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"29","issue":"2","first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.8091999888420105,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.8091999888420105,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10320","display_name":"Neural Networks and Applications","score":0.010700000450015068,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.008999999612569809,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/cuda","display_name":"CUDA","score":0.7663000226020813},{"id":"https://openalex.org/keywords/convolution","display_name":"Convolution (computer science)","score":0.6014999747276306},{"id":"https://openalex.org/keywords/preprocessor","display_name":"Preprocessor","score":0.5893999934196472},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.5320000052452087},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.5076000094413757},{"id":"https://openalex.org/keywords/implementation","display_name":"Implementation","score":0.4474000036716461},{"id":"https://openalex.org/keywords/shared-memory","display_name":"Shared memory","score":0.43950000405311584}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.9259999990463257},{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.7663000226020813},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.6502000093460083},{"id":"https://openalex.org/C45347329","wikidata":"https://www.wikidata.org/wiki/Q5166604","display_name":"Convolution (computer science)","level":3,"score":0.6014999747276306},{"id":"https://openalex.org/C34736171","wikidata":"https://www.wikidata.org/wiki/Q918333","display_name":"Preprocessor","level":2,"score":0.5893999934196472},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.5320000052452087},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.5076000094413757},{"id":"https://openalex.org/C26713055","wikidata":"https://www.wikidata.org/wiki/Q245962","display_name":"Implementation","level":2,"score":0.4474000036716461},{"id":"https://openalex.org/C133875982","wikidata":"https://www.wikidata.org/wiki/Q764810","display_name":"Shared memory","level":2,"score":0.43950000405311584},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3846000134944916},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.38350000977516174},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.33149999380111694},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.31130000948905945},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.3075000047683716},{"id":"https://openalex.org/C120373497","wikidata":"https://www.wikidata.org/wiki/Q1087987","display_name":"Parallel algorithm","level":2,"score":0.28369998931884766},{"id":"https://openalex.org/C91481028","wikidata":"https://www.wikidata.org/wiki/Q1054686","display_name":"Distributed memory","level":3,"score":0.27950000762939453},{"id":"https://openalex.org/C176649486","wikidata":"https://www.wikidata.org/wiki/Q2308807","display_name":"Memory management","level":3,"score":0.2743000090122223},{"id":"https://openalex.org/C77618280","wikidata":"https://www.wikidata.org/wiki/Q1155772","display_name":"Scheme (mathematics)","level":2,"score":0.26759999990463257},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.2619999945163727},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.26089999079704285},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.2603999972343445}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1007/s10586-025-05895-9","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10586-025-05895-9","pdf_url":"https://link.springer.com/content/pdf/10.1007/s10586-025-05895-9.pdf","source":{"id":"https://openalex.org/S106148199","display_name":"Cluster Computing","issn_l":"1386-7857","issn":["1386-7857","1573-7543"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Cluster Computing","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1007/s10586-025-05895-9","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10586-025-05895-9","pdf_url":"https://link.springer.com/content/pdf/10.1007/s10586-025-05895-9.pdf","source":{"id":"https://openalex.org/S106148199","display_name":"Cluster Computing","issn_l":"1386-7857","issn":["1386-7857","1573-7543"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Cluster Computing","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320323424","display_name":"Universidade de Lisboa","ror":"https://ror.org/01c27hj86"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W7120111704.pdf","grobid_xml":"https://content.openalex.org/works/W7120111704.grobid-xml"},"referenced_works_count":21,"referenced_works":["https://openalex.org/W1978642402","https://openalex.org/W2097117768","https://openalex.org/W2139774022","https://openalex.org/W2162322364","https://openalex.org/W2163687928","https://openalex.org/W2194775991","https://openalex.org/W2622428623","https://openalex.org/W2736230459","https://openalex.org/W2801177523","https://openalex.org/W2963989532","https://openalex.org/W2998957070","https://openalex.org/W3080244561","https://openalex.org/W3098382995","https://openalex.org/W3141650078","https://openalex.org/W3204140704","https://openalex.org/W4206841102","https://openalex.org/W4294691471","https://openalex.org/W4302010773","https://openalex.org/W4308083841","https://openalex.org/W4308090436","https://openalex.org/W4401408852"],"related_works":[],"abstract_inverted_index":{"Abstract":[0],"This":[1],"work":[2],"presents":[3],"an":[4],"open,":[5],"efficient":[6],"(fast)":[7],"CUDA":[8],"convolution":[9,30],"neural":[10],"network":[11],"inference":[12],"implementation":[13],"specialized":[14],"in":[15],"some":[16],"layers":[17],"of":[18,33],"popular":[19],"nets":[20],"like":[21],"ResNet,":[22],"VGG,":[23],"and":[24,48,58,65],"GoogLeNet.":[25],"The":[26],"proposed":[27],"algorithm":[28],"implements":[29],"directly":[31],"instead":[32],"preprocessing":[34],"with":[35,70,79],"image":[36],"to":[37,43],"columns.":[38],"Algorithm":[39],"parameters":[40],"are":[41,68,82],"selected":[42],"meet":[44],"constraints":[45],"on":[46],"global":[47],"shared":[49,55],"memory":[50,56,66],"access":[51,67],"bandwidth,":[52],"register":[53],"usage,":[54,57],"instructions":[59],"per":[60,74],"clock.":[61],"Parallel":[62],"arithmetic":[63],"operations":[64],"achieved":[69],"several":[71],"parallel":[72],"blocks":[73],"streaming":[75],"processor.":[76],"Results":[77],"comparing":[78],"state-of-the-art":[80],"implementations":[81],"presented.":[83]},"counts_by_year":[],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2026-01-10T00:00:00"}
