{"id":"https://openalex.org/W3021017153","doi":"https://doi.org/10.3233/apc200030","title":"High Performance Eigenvalue Solver for Hubbard Model: Tuning Strategies for LOBPCG Method on CUDA GPU","display_name":"High Performance Eigenvalue Solver for Hubbard Model: Tuning Strategies for LOBPCG Method on CUDA GPU","publication_year":2020,"publication_date":"2020-01-01","ids":{"openalex":"https://openalex.org/W3021017153","doi":"https://doi.org/10.3233/apc200030","mag":"3021017153"},"language":"en","primary_location":{"id":"doi:10.3233/apc200030","is_oa":true,"landing_page_url":"https://doi.org/10.3233/apc200030","pdf_url":"https://ebooks.iospress.nl/pdf/doi/10.3233/APC200030","source":{"id":"https://openalex.org/S4210175178","display_name":"Advances in parallel computing","issn_l":"0927-5452","issn":["0927-5452","1879-808X"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320990","host_organization_name":"Elsevier BV","host_organization_lineage":["https://openalex.org/P4310320990"],"host_organization_lineage_names":["Elsevier BV"],"type":"book series"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Advances in Parallel Computing","raw_type":"book-chapter"},"type":"book-chapter","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://ebooks.iospress.nl/pdf/doi/10.3233/APC200030","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5006749613","display_name":"Susumu Yamada","orcid":"https://orcid.org/0000-0003-0236-7124"},"institutions":[{"id":"https://openalex.org/I117197279","display_name":"Japan Atomic Energy Agency","ror":"https://ror.org/05nf86y53","country_code":"JP","type":"funder","lineage":["https://openalex.org/I117197279"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Yamada Susumu","raw_affiliation_strings":["Center for Computational Science & e-Systems, Japan Atomic Energy Agency"],"affiliations":[{"raw_affiliation_string":"Center for Computational Science & e-Systems, Japan Atomic Energy Agency","institution_ids":["https://openalex.org/I117197279"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010173270","display_name":"Masahiko Machida","orcid":null},"institutions":[{"id":"https://openalex.org/I117197279","display_name":"Japan Atomic Energy Agency","ror":"https://ror.org/05nf86y53","country_code":"JP","type":"funder","lineage":["https://openalex.org/I117197279"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Machida Masahiko","raw_affiliation_strings":["Center for Computational Science & e-Systems, Japan Atomic Energy Agency"],"affiliations":[{"raw_affiliation_string":"Center for Computational Science & e-Systems, Japan Atomic Energy Agency","institution_ids":["https://openalex.org/I117197279"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5086152822","display_name":"Toshiyuki Imamura","orcid":"https://orcid.org/0000-0003-1601-9710"},"institutions":[{"id":"https://openalex.org/I4210129730","display_name":"RIKEN Center for Computational Science","ror":"https://ror.org/03r519674","country_code":"JP","type":"facility","lineage":["https://openalex.org/I4210110652","https://openalex.org/I4210129730"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Imamura Toshiyuki","raw_affiliation_strings":["RIKEN Center for Computational Science"],"affiliations":[{"raw_affiliation_string":"RIKEN Center for Computational Science","institution_ids":["https://openalex.org/I4210129730"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5006749613"],"corresponding_institution_ids":["https://openalex.org/I117197279"],"apc_list":null,"apc_paid":null,"fwci":0.603,"has_fulltext":true,"cited_by_count":1,"citation_normalized_percentile":{"value":0.65660225,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10792","display_name":"Matrix Theory and Algorithms","score":0.9958000183105469,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10792","display_name":"Matrix Theory and Algorithms","score":0.9958000183105469,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9936000108718872,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9908999800682068,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/cuda","display_name":"CUDA","score":0.7358642816543579},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.7031819224357605},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6987329125404358},{"id":"https://openalex.org/keywords/solver","display_name":"Solver","score":0.6712369918823242},{"id":"https://openalex.org/keywords/hamiltonian","display_name":"Hamiltonian (control theory)","score":0.5903661847114563},{"id":"https://openalex.org/keywords/eigenvalues-and-eigenvectors","display_name":"Eigenvalues and eigenvectors","score":0.5403661131858826},{"id":"https://openalex.org/keywords/cache","display_name":"Cache","score":0.4724082350730896},{"id":"https://openalex.org/keywords/computational-science","display_name":"Computational science","score":0.46598926186561584},{"id":"https://openalex.org/keywords/matrix-multiplication","display_name":"Matrix multiplication","score":0.4199936091899872},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.1571449637413025},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.14593738317489624},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.13422715663909912},{"id":"https://openalex.org/keywords/quantum","display_name":"Quantum","score":0.12187179923057556}],"concepts":[{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.7358642816543579},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.7031819224357605},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6987329125404358},{"id":"https://openalex.org/C2778770139","wikidata":"https://www.wikidata.org/wiki/Q1966904","display_name":"Solver","level":2,"score":0.6712369918823242},{"id":"https://openalex.org/C130787639","wikidata":"https://www.wikidata.org/wiki/Q5645293","display_name":"Hamiltonian (control theory)","level":2,"score":0.5903661847114563},{"id":"https://openalex.org/C158693339","wikidata":"https://www.wikidata.org/wiki/Q190524","display_name":"Eigenvalues and eigenvectors","level":2,"score":0.5403661131858826},{"id":"https://openalex.org/C115537543","wikidata":"https://www.wikidata.org/wiki/Q165596","display_name":"Cache","level":2,"score":0.4724082350730896},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.46598926186561584},{"id":"https://openalex.org/C17349429","wikidata":"https://www.wikidata.org/wiki/Q1049914","display_name":"Matrix multiplication","level":3,"score":0.4199936091899872},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.1571449637413025},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.14593738317489624},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.13422715663909912},{"id":"https://openalex.org/C84114770","wikidata":"https://www.wikidata.org/wiki/Q46344","display_name":"Quantum","level":2,"score":0.12187179923057556},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.3233/apc200030","is_oa":true,"landing_page_url":"https://doi.org/10.3233/apc200030","pdf_url":"https://ebooks.iospress.nl/pdf/doi/10.3233/APC200030","source":{"id":"https://openalex.org/S4210175178","display_name":"Advances in parallel computing","issn_l":"0927-5452","issn":["0927-5452","1879-808X"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320990","host_organization_name":"Elsevier BV","host_organization_lineage":["https://openalex.org/P4310320990"],"host_organization_lineage_names":["Elsevier BV"],"type":"book series"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Advances in Parallel Computing","raw_type":"book-chapter"}],"best_oa_location":{"id":"doi:10.3233/apc200030","is_oa":true,"landing_page_url":"https://doi.org/10.3233/apc200030","pdf_url":"https://ebooks.iospress.nl/pdf/doi/10.3233/APC200030","source":{"id":"https://openalex.org/S4210175178","display_name":"Advances in parallel computing","issn_l":"0927-5452","issn":["0927-5452","1879-808X"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320990","host_organization_name":"Elsevier BV","host_organization_lineage":["https://openalex.org/P4310320990"],"host_organization_lineage_names":["Elsevier BV"],"type":"book series"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Advances in Parallel Computing","raw_type":"book-chapter"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1069223013","display_name":null,"funder_award_id":"JSPS KAKENHI","funder_id":"https://openalex.org/F4320334764","funder_display_name":"Japan Society for the Promotion of Science"},{"id":"https://openalex.org/G3459562248","display_name":null,"funder_award_id":"Grant","funder_id":"https://openalex.org/F4320334764","funder_display_name":"Japan Society for the Promotion of Science"},{"id":"https://openalex.org/G4227499671","display_name":null,"funder_award_id":"KAKENHI Grant","funder_id":"https://openalex.org/F4320334764","funder_display_name":"Japan Society for the Promotion of Science"},{"id":"https://openalex.org/G4636223006","display_name":null,"funder_award_id":"JSPS KAK","funder_id":"https://openalex.org/F4320334764","funder_display_name":"Japan Society for the Promotion of Science"},{"id":"https://openalex.org/G5786340949","display_name":null,"funder_award_id":"KAKENHI Grant Number","funder_id":"https://openalex.org/F4320334764","funder_display_name":"Japan Society for the Promotion of Science"},{"id":"https://openalex.org/G5872082820","display_name":null,"funder_award_id":"18K11345","funder_id":"https://openalex.org/F4320334764","funder_display_name":"Japan Society for the Promotion of Science"},{"id":"https://openalex.org/G7752643416","display_name":null,"funder_award_id":"Japan","funder_id":"https://openalex.org/F4320334764","funder_display_name":"Japan Society for the Promotion of Science"},{"id":"https://openalex.org/G8430481527","display_name":null,"funder_award_id":"Number","funder_id":"https://openalex.org/F4320334764","funder_display_name":"Japan Society for the Promotion of Science"}],"funders":[{"id":"https://openalex.org/F4320334764","display_name":"Japan Society for the Promotion of Science","ror":"https://ror.org/00hhkn466"},{"id":"https://openalex.org/F4320334980","display_name":"Japan Atomic Energy Agency","ror":"https://ror.org/05nf86y53"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3021017153.pdf","grobid_xml":"https://content.openalex.org/works/W3021017153.grobid-xml"},"referenced_works_count":5,"referenced_works":["https://openalex.org/W1586414899","https://openalex.org/W1980357388","https://openalex.org/W2012003658","https://openalex.org/W2350991510","https://openalex.org/W2575851384"],"related_works":["https://openalex.org/W3189307731","https://openalex.org/W2949962288","https://openalex.org/W2364686214","https://openalex.org/W1428699136","https://openalex.org/W1998560227","https://openalex.org/W1604758548","https://openalex.org/W2085873709","https://openalex.org/W4367553810","https://openalex.org/W2104142636","https://openalex.org/W2022772718"],"abstract_inverted_index":{"The":[0,13,152],"exact":[1],"diagonalization":[2],"is":[3,30,50,78,165],"the":[4,10,16,20,25,28,47,53,64,69,81,93,99,115,133,137,147,150,161,171],"most":[5,54,61],"accurate":[6],"approach":[7,14],"for":[8,57,136],"solving":[9],"Hubbard":[11],"model.":[12,26],"calculates":[15],"ground":[17],"state":[18],"of":[19,52,63,80,120,146,149],"Hamiltonian":[21,29],"derived":[22],"exactly":[23],"from":[24],"Since":[27,60],"a":[31],"large":[32],"sparse":[33],"symmetric":[34],"matrix,":[35],"we":[36,113,131],"usually":[37],"utilize":[38],"an":[39],"iteration":[40],"method.":[41],"It":[42],"has":[43],"been":[44],"reported":[45],"that":[46,160],"LOBPCG":[48,163],"method":[49,65,70],"one":[51,79,97],"effectual":[55],"solvers":[56],"this":[58,111],"problem.":[59],"operations":[62,123],"are":[66,95],"linear":[67],"operations,":[68],"can":[71,103],"be":[72,105],"executed":[73,96],"on":[74,155],"CUDA":[75],"GPU,":[76],"which":[77],"mainstream":[82],"processors,":[83],"by":[84,117],"using":[85],"cuBLAS":[86,174],"and":[87,175],"cuSPARSE":[88,176],"libraries":[89],"straightforwardly.":[90],"However,":[91],"since":[92],"routines":[94,116],"after":[98],"other,":[100],"cached":[101,128],"data":[102],"not":[104],"reused":[106],"among":[107],"other":[108],"routines.":[109,177],"In":[110],"research,":[112],"tune":[114],"fusing":[118],"some":[119],"their":[121],"loop":[122],"in":[124,144],"order":[125],"to":[126],"reuse":[127],"data.":[129],"Moreover,":[130],"propose":[132],"tuning":[134],"strategies":[135],"Hamiltonianvector":[138],"multiplication":[139],"with":[140,173],"shared":[141],"memory":[142],"system":[143],"consideration":[145],"character":[148],"Hamiltonian.":[151],"numerical":[153],"test":[154],"NVIDIA":[156],"Tesla":[157],"P100":[158],"shows":[159],"tuned":[162],"code":[164,172],"about":[166],"1.5":[167],"times":[168],"faster":[169],"than":[170]},"counts_by_year":[{"year":2022,"cited_by_count":1}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2020-05-13T00:00:00"}
