{"id":"https://openalex.org/W2562420267","doi":"https://doi.org/10.1142/s0129626416400016","title":"A Novel Multi-GPU Parallel Optimization Model for The Sparse Matrix-Vector Multiplication","display_name":"A Novel Multi-GPU Parallel Optimization Model for The Sparse Matrix-Vector Multiplication","publication_year":2016,"publication_date":"2016-12-01","ids":{"openalex":"https://openalex.org/W2562420267","doi":"https://doi.org/10.1142/s0129626416400016","mag":"2562420267"},"language":"en","primary_location":{"id":"doi:10.1142/s0129626416400016","is_oa":false,"landing_page_url":"https://doi.org/10.1142/s0129626416400016","pdf_url":null,"source":{"id":"https://openalex.org/S18360026","display_name":"Parallel Processing Letters","issn_l":"0129-6264","issn":["0129-6264","1793-642X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319815","host_organization_name":"World Scientific","host_organization_lineage":["https://openalex.org/P4310319815"],"host_organization_lineage_names":["World Scientific"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Parallel Processing Letters","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5047780118","display_name":"Jiaquan Gao","orcid":"https://orcid.org/0000-0002-2983-9921"},"institutions":[{"id":"https://openalex.org/I152031979","display_name":"Nanjing Normal University","ror":"https://ror.org/036trcv74","country_code":"CN","type":"education","lineage":["https://openalex.org/I152031979"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Jiaquan Gao","raw_affiliation_strings":["School of Computer Science and Technology, Nanjing Normal University, Nanjing 210097, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Nanjing Normal University, Nanjing 210097, China","institution_ids":["https://openalex.org/I152031979"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101838846","display_name":"Yuanshen Zhou","orcid":"https://orcid.org/0000-0001-5757-5794"},"institutions":[{"id":"https://openalex.org/I55712492","display_name":"Zhejiang University of Technology","ror":"https://ror.org/02djqfd08","country_code":"CN","type":"education","lineage":["https://openalex.org/I55712492"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuanshen Zhou","raw_affiliation_strings":["College of Computer Science and Technology, Zhejiang University of Technology, Hangzhou, 310023, China"],"affiliations":[{"raw_affiliation_string":"College of Computer Science and Technology, Zhejiang University of Technology, Hangzhou, 310023, China","institution_ids":["https://openalex.org/I55712492"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5082226717","display_name":"Kesong Wu","orcid":null},"institutions":[{"id":"https://openalex.org/I55712492","display_name":"Zhejiang University of Technology","ror":"https://ror.org/02djqfd08","country_code":"CN","type":"education","lineage":["https://openalex.org/I55712492"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Kesong Wu","raw_affiliation_strings":["College of Computer Science and Technology, Zhejiang University of Technology, Hangzhou, 310023, China"],"affiliations":[{"raw_affiliation_string":"College of Computer Science and Technology, Zhejiang University of Technology, Hangzhou, 310023, China","institution_ids":["https://openalex.org/I55712492"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5047780118"],"corresponding_institution_ids":["https://openalex.org/I152031979"],"apc_list":null,"apc_paid":null,"fwci":0.3153,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.60146699,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":"26","issue":"04","first_page":"1640001","last_page":"1640001"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10792","display_name":"Matrix Theory and Algorithms","score":0.9968000054359436,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9965000152587891,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8768789172172546},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.7826499938964844},{"id":"https://openalex.org/keywords/cuda","display_name":"CUDA","score":0.6605560779571533},{"id":"https://openalex.org/keywords/multiplication","display_name":"Multiplication (music)","score":0.6588585376739502},{"id":"https://openalex.org/keywords/block","display_name":"Block (permutation group theory)","score":0.5760301351547241},{"id":"https://openalex.org/keywords/matrix-multiplication","display_name":"Matrix multiplication","score":0.5428968667984009},{"id":"https://openalex.org/keywords/graphics","display_name":"Graphics","score":0.523338258266449},{"id":"https://openalex.org/keywords/sparse-matrix","display_name":"Sparse matrix","score":0.46997278928756714},{"id":"https://openalex.org/keywords/matrix","display_name":"Matrix (chemical analysis)","score":0.4590867757797241},{"id":"https://openalex.org/keywords/construct","display_name":"Construct (python library)","score":0.4559910297393799},{"id":"https://openalex.org/keywords/general-purpose-computing-on-graphics-processing-units","display_name":"General-purpose computing on graphics processing units","score":0.4505409300327301},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.3353918790817261},{"id":"https://openalex.org/keywords/computer-graphics","display_name":"Computer graphics (images)","score":0.08041414618492126},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.07836517691612244}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8768789172172546},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.7826499938964844},{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.6605560779571533},{"id":"https://openalex.org/C2780595030","wikidata":"https://www.wikidata.org/wiki/Q3860309","display_name":"Multiplication (music)","level":2,"score":0.6588585376739502},{"id":"https://openalex.org/C2777210771","wikidata":"https://www.wikidata.org/wiki/Q4927124","display_name":"Block (permutation group theory)","level":2,"score":0.5760301351547241},{"id":"https://openalex.org/C17349429","wikidata":"https://www.wikidata.org/wiki/Q1049914","display_name":"Matrix multiplication","level":3,"score":0.5428968667984009},{"id":"https://openalex.org/C21442007","wikidata":"https://www.wikidata.org/wiki/Q1027879","display_name":"Graphics","level":2,"score":0.523338258266449},{"id":"https://openalex.org/C56372850","wikidata":"https://www.wikidata.org/wiki/Q1050404","display_name":"Sparse matrix","level":3,"score":0.46997278928756714},{"id":"https://openalex.org/C106487976","wikidata":"https://www.wikidata.org/wiki/Q685816","display_name":"Matrix (chemical analysis)","level":2,"score":0.4590867757797241},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.4559910297393799},{"id":"https://openalex.org/C50630238","wikidata":"https://www.wikidata.org/wiki/Q971505","display_name":"General-purpose computing on graphics processing units","level":3,"score":0.4505409300327301},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3353918790817261},{"id":"https://openalex.org/C121684516","wikidata":"https://www.wikidata.org/wiki/Q7600677","display_name":"Computer graphics (images)","level":1,"score":0.08041414618492126},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.07836517691612244},{"id":"https://openalex.org/C163716315","wikidata":"https://www.wikidata.org/wiki/Q901177","display_name":"Gaussian","level":2,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0},{"id":"https://openalex.org/C159985019","wikidata":"https://www.wikidata.org/wiki/Q181790","display_name":"Composite material","level":1,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C192562407","wikidata":"https://www.wikidata.org/wiki/Q228736","display_name":"Materials science","level":0,"score":0.0},{"id":"https://openalex.org/C84114770","wikidata":"https://www.wikidata.org/wiki/Q46344","display_name":"Quantum","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1142/s0129626416400016","is_oa":false,"landing_page_url":"https://doi.org/10.1142/s0129626416400016","pdf_url":null,"source":{"id":"https://openalex.org/S18360026","display_name":"Parallel Processing Letters","issn_l":"0129-6264","issn":["0129-6264","1793-642X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319815","host_organization_name":"World Scientific","host_organization_lineage":["https://openalex.org/P4310319815"],"host_organization_lineage_names":["World Scientific"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Parallel Processing Letters","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320338464","display_name":"Natural Science Foundation of Zhejiang Province","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":9,"referenced_works":["https://openalex.org/W1969255113","https://openalex.org/W1981557297","https://openalex.org/W1995649289","https://openalex.org/W2031460602","https://openalex.org/W2082238959","https://openalex.org/W2089908605","https://openalex.org/W2101511474","https://openalex.org/W2102528350","https://openalex.org/W2274552607"],"related_works":["https://openalex.org/W1963859303","https://openalex.org/W2364044215","https://openalex.org/W2389600408","https://openalex.org/W240129890","https://openalex.org/W3048701459","https://openalex.org/W2149078538","https://openalex.org/W2080146221","https://openalex.org/W2546223573","https://openalex.org/W2370314112","https://openalex.org/W1912958759"],"abstract_inverted_index":{"Accelerating":[0],"the":[1,7,25,77,102,107,116,128,144,159,164,182],"sparse":[2],"matrix-vector":[3],"multiplication":[4],"(SpMV)":[5],"on":[6,20,106,163],"graphics":[8],"processing":[9],"units":[10],"(GPUs)":[11],"has":[12],"attracted":[13],"considerable":[14],"attention":[15],"recently.":[16],"We":[17],"observe":[18],"that":[19,137,150],"a":[21,34,43,52,64,80,95],"specific":[22],"multiple-GPU":[23],"platform,":[24],"SpMV":[26,68,120,135,154],"performance":[27,96],"can":[28],"usually":[29],"be":[30],"greatly":[31],"improved":[32],"when":[33],"matrix":[35,89],"is":[36,49,83,99,111,138,155,170],"partitioned":[37],"into":[38],"several":[39],"blocks":[40],"according":[41],"to":[42,51,62,85,113],"predetermined":[44],"rule":[45,82],"and":[46,93,104,140,161],"each":[47,175],"block":[48],"assigned":[50],"GPU":[53],"with":[54],"an":[55,131],"appropriate":[56],"storage":[57],"format.":[58],"This":[59],"motivates":[60],"us":[61],"propose":[63],"novel":[65],"multi-GPU":[66,133],"parallel":[67,134],"optimization":[69],"model.":[70,188],"Our":[71],"model":[72,152,169],"involves":[73],"two":[74],"stages.":[75],"In":[76],"first":[78],"stage,":[79],"simple":[81],"defined":[84],"divide":[86],"any":[87,147],"given":[88],"among":[90],"multiple":[91],"GPUs,":[92],"then":[94],"model,":[97],"which":[98],"independent":[100,157],"of":[101,109,119,158,166,177,185],"problems":[103],"dependent":[105,162],"resources":[108,165],"devices,":[110,167],"proposed":[112,187],"accurately":[114],"predict":[115],"execution":[117],"time":[118],"kernels.":[121],"Using":[122],"these":[123],"models,":[124],"we":[125],"construct":[126],"in":[127],"second":[129],"stage":[130],"optimally":[132],"algorithm":[136],"automatically":[139],"rapidly":[141],"generated":[142],"for":[143,146,153,174],"platform":[145],"problem.":[148],"Given":[149],"our":[151,186],"general,":[156],"problems,":[160],"this":[168],"constructed":[171],"only":[172],"once":[173],"type":[176],"GPU.":[178],"The":[179],"experiments":[180],"validate":[181],"high":[183],"efficiency":[184]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2021,"cited_by_count":1},{"year":2016,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
