{"id":"https://openalex.org/W2323164726","doi":"https://doi.org/10.1109/tpds.2015.2450718","title":"Designing Efficient Index-Digit Algorithms for CUDA GPU Architectures","display_name":"Designing Efficient Index-Digit Algorithms for CUDA GPU Architectures","publication_year":2015,"publication_date":"2015-07-21","ids":{"openalex":"https://openalex.org/W2323164726","doi":"https://doi.org/10.1109/tpds.2015.2450718","mag":"2323164726"},"language":"en","primary_location":{"id":"doi:10.1109/tpds.2015.2450718","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpds.2015.2450718","pdf_url":null,"source":{"id":"https://openalex.org/S97130795","display_name":"IEEE Transactions on Parallel and Distributed Systems","issn_l":"1045-9219","issn":["1045-9219","1558-2183","2161-9883"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Parallel and Distributed Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5060934969","display_name":"Jacobo Lobeiras","orcid":null},"institutions":[{"id":"https://openalex.org/I11019714","display_name":"Universidade da Coru\u00f1a","ror":"https://ror.org/01qckj285","country_code":"ES","type":"education","lineage":["https://openalex.org/I11019714"]}],"countries":["ES"],"is_corresponding":true,"raw_author_name":"Jacobo Lobeiras","raw_affiliation_strings":["Computer Architecture Group (GAC), University of A Coru\u00f1a (UDC), Spain"],"affiliations":[{"raw_affiliation_string":"Computer Architecture Group (GAC), University of A Coru\u00f1a (UDC), Spain","institution_ids":["https://openalex.org/I11019714"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5068836759","display_name":"Margarita Amor","orcid":"https://orcid.org/0000-0003-4641-7891"},"institutions":[{"id":"https://openalex.org/I11019714","display_name":"Universidade da Coru\u00f1a","ror":"https://ror.org/01qckj285","country_code":"ES","type":"education","lineage":["https://openalex.org/I11019714"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Margarita Amor","raw_affiliation_strings":["Computer Architecture Group (GAC), University of A Coru\u00f1a (UDC), Spain"],"affiliations":[{"raw_affiliation_string":"Computer Architecture Group (GAC), University of A Coru\u00f1a (UDC), Spain","institution_ids":["https://openalex.org/I11019714"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5070686724","display_name":"Ram\u00f3n Doallo","orcid":"https://orcid.org/0000-0002-6011-3387"},"institutions":[{"id":"https://openalex.org/I11019714","display_name":"Universidade da Coru\u00f1a","ror":"https://ror.org/01qckj285","country_code":"ES","type":"education","lineage":["https://openalex.org/I11019714"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Ramon Doallo","raw_affiliation_strings":["Computer Architecture Group (GAC), University of A Coru\u00f1a (UDC), Spain"],"affiliations":[{"raw_affiliation_string":"Computer Architecture Group (GAC), University of A Coru\u00f1a (UDC), Spain","institution_ids":["https://openalex.org/I11019714"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5060934969"],"corresponding_institution_ids":["https://openalex.org/I11019714"],"apc_list":null,"apc_paid":null,"fwci":2.3088,"has_fulltext":false,"cited_by_count":12,"citation_normalized_percentile":{"value":0.88807324,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":"27","issue":"5","first_page":"1331","last_page":"1343"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12326","display_name":"Network Packet Processing and Optimization","score":0.9976999759674072,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10904","display_name":"Embedded Systems Design Techniques","score":0.9975000023841858,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8891750574111938},{"id":"https://openalex.org/keywords/cuda","display_name":"CUDA","score":0.8210741281509399},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.6813591718673706},{"id":"https://openalex.org/keywords/general-purpose-computing-on-graphics-processing-units","display_name":"General-purpose computing on graphics processing units","score":0.6254832148551941},{"id":"https://openalex.org/keywords/coprocessor","display_name":"Coprocessor","score":0.5877145528793335},{"id":"https://openalex.org/keywords/graphics","display_name":"Graphics","score":0.49121779203414917},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.456666499376297},{"id":"https://openalex.org/keywords/solver","display_name":"Solver","score":0.4280937910079956},{"id":"https://openalex.org/keywords/tridiagonal-matrix","display_name":"Tridiagonal matrix","score":0.4260624945163727},{"id":"https://openalex.org/keywords/computer-graphics","display_name":"Computer graphics (images)","score":0.1421717405319214},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.09449604153633118}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8891750574111938},{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.8210741281509399},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.6813591718673706},{"id":"https://openalex.org/C50630238","wikidata":"https://www.wikidata.org/wiki/Q971505","display_name":"General-purpose computing on graphics processing units","level":3,"score":0.6254832148551941},{"id":"https://openalex.org/C86111242","wikidata":"https://www.wikidata.org/wiki/Q859595","display_name":"Coprocessor","level":2,"score":0.5877145528793335},{"id":"https://openalex.org/C21442007","wikidata":"https://www.wikidata.org/wiki/Q1027879","display_name":"Graphics","level":2,"score":0.49121779203414917},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.456666499376297},{"id":"https://openalex.org/C2778770139","wikidata":"https://www.wikidata.org/wiki/Q1966904","display_name":"Solver","level":2,"score":0.4280937910079956},{"id":"https://openalex.org/C51647924","wikidata":"https://www.wikidata.org/wiki/Q1755277","display_name":"Tridiagonal matrix","level":3,"score":0.4260624945163727},{"id":"https://openalex.org/C121684516","wikidata":"https://www.wikidata.org/wiki/Q7600677","display_name":"Computer graphics (images)","level":1,"score":0.1421717405319214},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.09449604153633118},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C158693339","wikidata":"https://www.wikidata.org/wiki/Q190524","display_name":"Eigenvalues and eigenvectors","level":2,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tpds.2015.2450718","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpds.2015.2450718","pdf_url":null,"source":{"id":"https://openalex.org/S97130795","display_name":"IEEE Transactions on Parallel and Distributed Systems","issn_l":"1045-9219","issn":["1045-9219","1558-2183","2161-9883"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Parallel and Distributed Systems","raw_type":"journal-article"},{"id":"pmh:oai:ruc.udc.es:2183/40697","is_oa":false,"landing_page_url":"http://hdl.handle.net/2183/40697","pdf_url":null,"source":{"id":"https://openalex.org/S4306402204","display_name":"RUC (Universidade Da Coru\u00f1a)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"info:eu-repo/semantics/article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Decent work and economic growth","id":"https://metadata.un.org/sdg/8","score":0.4699999988079071}],"awards":[{"id":"https://openalex.org/G494760652","display_name":null,"funder_award_id":"TIN2013-42148-P","funder_id":"https://openalex.org/F4320321595","funder_display_name":"Federaci\u00f3n Espa\u00f1ola de Enfermedades Raras"},{"id":"https://openalex.org/G6238163624","display_name":null,"funder_award_id":"GRC2013/055","funder_id":"https://openalex.org/F4320321595","funder_display_name":"Federaci\u00f3n Espa\u00f1ola de Enfermedades Raras"}],"funders":[{"id":"https://openalex.org/F4320321595","display_name":"Federaci\u00f3n Espa\u00f1ola de Enfermedades Raras","ror":"https://ror.org/0348bpk17"},{"id":"https://openalex.org/F4320321837","display_name":"Ministerio de Econom\u00eda y Competitividad","ror":"https://ror.org/034900433"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":30,"referenced_works":["https://openalex.org/W1762731526","https://openalex.org/W1978362304","https://openalex.org/W1978642402","https://openalex.org/W1984113966","https://openalex.org/W1997659541","https://openalex.org/W2008110907","https://openalex.org/W2045992017","https://openalex.org/W2048332769","https://openalex.org/W2094677173","https://openalex.org/W2099753835","https://openalex.org/W2102182691","https://openalex.org/W2102512593","https://openalex.org/W2107483876","https://openalex.org/W2110195531","https://openalex.org/W2112185810","https://openalex.org/W2113282196","https://openalex.org/W2119640849","https://openalex.org/W2128539477","https://openalex.org/W2129806488","https://openalex.org/W2130336316","https://openalex.org/W2130749431","https://openalex.org/W2136952590","https://openalex.org/W2141524575","https://openalex.org/W2158261096","https://openalex.org/W2160406723","https://openalex.org/W2161456019","https://openalex.org/W2167334577","https://openalex.org/W4239609053","https://openalex.org/W6649733760","https://openalex.org/W6683482463"],"related_works":["https://openalex.org/W2051273732","https://openalex.org/W1963859303","https://openalex.org/W2364044215","https://openalex.org/W3145240193","https://openalex.org/W2389600408","https://openalex.org/W240129890","https://openalex.org/W3048701459","https://openalex.org/W2149078538","https://openalex.org/W2080146221","https://openalex.org/W2147073383"],"abstract_inverted_index":{"Modern":[0],"graphics":[1],"processing":[2],"units":[3],"(GPUs)":[4],"offer":[5],"very":[6],"high":[7],"computing":[8],"power":[9],"at":[10],"relatively":[11],"low":[12],"cost.":[13],"Nevertheless,":[14],"designing":[15],"efficient":[16],"algorithms":[17,52],"for":[18,28,43,140],"the":[19,41,54,61,64,67,70,97,101,138],"GPUs":[20],"normally":[21],"requires":[22],"additional":[23],"time":[24],"and":[25,83,91,100,110,114,132],"effort,":[26],"even":[27],"experienced":[29],"programmers.":[30],"In":[31],"this":[32],"work":[33],"we":[34],"present":[35],"a":[36],"tuning":[37],"methodology":[38],"that":[39,50],"allows":[40],"design":[42],"CUDA-enabled":[44],"GPU":[45,80],"architectures":[46],"of":[47,63,69,122],"index-digit":[48],"algorithms,":[49,95],"is,":[51],"where":[53],"data":[55,71,142],"movement":[56],"can":[57],"be":[58],"described":[59],"as":[60,79],"permutations":[62],"digits":[65],"comprising":[66],"indices":[68],"elements.":[72],"This":[73],"methodology,":[74],"based":[75],"on":[76],"two-stages":[77],"identified":[78],"resource":[81],"analysis":[82],"operators":[84],"string":[85],"manipulation,":[86],"is":[87,108],"applied":[88],"to":[89,124],"FFT":[90],"tridiagonal":[92,143],"systems":[93],"solver":[94],"analyzing":[96],"performance":[98],"features":[99],"most":[102],"adequate":[103],"solutions.":[104],"The":[105],"resulting":[106],"implementation":[107],"compact":[109],"outperforms":[111],"other":[112],"well-known":[113],"commonly":[115],"used":[116],"state-of-the-art":[117],"libraries,":[118],"with":[119],"an":[120],"improvement":[121],"up":[123],"19.2":[125],"percent":[126,136],"over":[127,137],"NVIDIA's":[128],"complex":[129],"CUFFT":[130],",":[131],"more":[133],"than":[134],"3000":[135],"NVIDIA'sCUDPP":[139],"real":[141],"systems.":[144]},"counts_by_year":[{"year":2022,"cited_by_count":3},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":1},{"year":2018,"cited_by_count":4},{"year":2017,"cited_by_count":2},{"year":2015,"cited_by_count":1}],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2016-06-24T00:00:00"}
