{"id":"https://openalex.org/W2894778426","doi":"https://doi.org/10.1080/10556788.2018.1527331","title":"GPU parameter tuning for tall and skinny dense linear least squares problems","display_name":"GPU parameter tuning for tall and skinny dense linear least squares problems","publication_year":2018,"publication_date":"2018-10-01","ids":{"openalex":"https://openalex.org/W2894778426","doi":"https://doi.org/10.1080/10556788.2018.1527331","mag":"2894778426"},"language":"en","primary_location":{"id":"doi:10.1080/10556788.2018.1527331","is_oa":false,"landing_page_url":"https://doi.org/10.1080/10556788.2018.1527331","pdf_url":null,"source":{"id":"https://openalex.org/S103047102","display_name":"Optimization methods & software","issn_l":"1026-7670","issn":["1026-7670","1029-4937","1055-6788"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320547","host_organization_name":"Taylor & Francis","host_organization_lineage":["https://openalex.org/P4310320547"],"host_organization_lineage_names":["Taylor & Francis"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Optimization Methods and Software","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5063828235","display_name":"Benjamin Sauk","orcid":"https://orcid.org/0000-0002-1138-966X"},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Benjamin Sauk","raw_affiliation_strings":["Department of Chemical Engineering, Carnegie Mellon University, Pittsburgh, PA, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Chemical Engineering, Carnegie Mellon University, Pittsburgh, PA, USA","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010858186","display_name":"\u039d\u03b9\u03ba\u03cc\u03bb\u03b1\u03bf\u03c2 \u03a0\u03bb\u03cc\u03c3\u03ba\u03b1\u03c2","orcid":"https://orcid.org/0000-0001-5876-9945"},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Nikolaos Ploskas","raw_affiliation_strings":["Department of Chemical Engineering, Carnegie Mellon University, Pittsburgh, PA, USA"],"raw_orcid":"https://orcid.org/0000-0001-5876-9945","affiliations":[{"raw_affiliation_string":"Department of Chemical Engineering, Carnegie Mellon University, Pittsburgh, PA, USA","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5031811254","display_name":"Nikolaos V. Sahinidis","orcid":"https://orcid.org/0000-0003-2087-9131"},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Nikolaos Sahinidis","raw_affiliation_strings":["Department of Chemical Engineering, Carnegie Mellon University, Pittsburgh, PA, USA"],"raw_orcid":"https://orcid.org/0000-0003-2087-9131","affiliations":[{"raw_affiliation_string":"Department of Chemical Engineering, Carnegie Mellon University, Pittsburgh, PA, USA","institution_ids":["https://openalex.org/I74973139"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5031811254"],"corresponding_institution_ids":["https://openalex.org/I74973139"],"apc_list":null,"apc_paid":null,"fwci":0.6412,"has_fulltext":false,"cited_by_count":13,"citation_normalized_percentile":{"value":0.7247109,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":"35","issue":"3","first_page":"638","last_page":"660"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10792","display_name":"Matrix Theory and Algorithms","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10792","display_name":"Matrix Theory and Algorithms","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10963","display_name":"Advanced Optimization Algorithms Research","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/2612","display_name":"Numerical Analysis"},"field":{"id":"https://openalex.org/fields/26","display_name":"Mathematics"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12303","display_name":"Tensor decomposition and applications","score":0.9905999898910522,"subfield":{"id":"https://openalex.org/subfields/2605","display_name":"Computational Mathematics"},"field":{"id":"https://openalex.org/fields/26","display_name":"Mathematics"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7729376554489136},{"id":"https://openalex.org/keywords/solver","display_name":"Solver","score":0.7675384283065796},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.7005037665367126},{"id":"https://openalex.org/keywords/cuda","display_name":"CUDA","score":0.5992948412895203},{"id":"https://openalex.org/keywords/graphics","display_name":"Graphics","score":0.5606722235679626},{"id":"https://openalex.org/keywords/general-purpose-computing-on-graphics-processing-units","display_name":"General-purpose computing on graphics processing units","score":0.5529088973999023},{"id":"https://openalex.org/keywords/magma","display_name":"Magma","score":0.49836206436157227},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.46666425466537476},{"id":"https://openalex.org/keywords/computational-science","display_name":"Computational science","score":0.44030076265335083},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.4278682470321655},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.4147893488407135},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.3857363760471344},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.14573925733566284},{"id":"https://openalex.org/keywords/computer-graphics","display_name":"Computer graphics (images)","score":0.13293510675430298},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.0746757984161377}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7729376554489136},{"id":"https://openalex.org/C2778770139","wikidata":"https://www.wikidata.org/wiki/Q1966904","display_name":"Solver","level":2,"score":0.7675384283065796},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.7005037665367126},{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.5992948412895203},{"id":"https://openalex.org/C21442007","wikidata":"https://www.wikidata.org/wiki/Q1027879","display_name":"Graphics","level":2,"score":0.5606722235679626},{"id":"https://openalex.org/C50630238","wikidata":"https://www.wikidata.org/wiki/Q971505","display_name":"General-purpose computing on graphics processing units","level":3,"score":0.5529088973999023},{"id":"https://openalex.org/C183222429","wikidata":"https://www.wikidata.org/wiki/Q42278","display_name":"Magma","level":3,"score":0.49836206436157227},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.46666425466537476},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.44030076265335083},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.4278682470321655},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.4147893488407135},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.3857363760471344},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.14573925733566284},{"id":"https://openalex.org/C121684516","wikidata":"https://www.wikidata.org/wiki/Q7600677","display_name":"Computer graphics (images)","level":1,"score":0.13293510675430298},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0746757984161377},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.0},{"id":"https://openalex.org/C120806208","wikidata":"https://www.wikidata.org/wiki/Q8072","display_name":"Volcano","level":2,"score":0.0},{"id":"https://openalex.org/C127313418","wikidata":"https://www.wikidata.org/wiki/Q1069","display_name":"Geology","level":0,"score":0.0},{"id":"https://openalex.org/C165205528","wikidata":"https://www.wikidata.org/wiki/Q83371","display_name":"Seismology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1080/10556788.2018.1527331","is_oa":false,"landing_page_url":"https://doi.org/10.1080/10556788.2018.1527331","pdf_url":null,"source":{"id":"https://openalex.org/S103047102","display_name":"Optimization methods & software","issn_l":"1026-7670","issn":["1026-7670","1029-4937","1055-6788"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320547","host_organization_name":"Taylor & Francis","host_organization_lineage":["https://openalex.org/P4310320547"],"host_organization_lineage_names":["Taylor & Francis"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Optimization Methods and Software","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320306084","display_name":"U.S. Department of Energy","ror":"https://ror.org/01bj3aw27"},{"id":"https://openalex.org/F4320309480","display_name":"Nvidia","ror":"https://ror.org/03jdj4y14"},{"id":"https://openalex.org/F4320332353","display_name":"Office of Fossil Energy","ror":"https://ror.org/03ery9d53"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":64,"referenced_works":["https://openalex.org/W57332520","https://openalex.org/W128066320","https://openalex.org/W197187682","https://openalex.org/W1497256448","https://openalex.org/W1760551737","https://openalex.org/W1863336885","https://openalex.org/W1967989271","https://openalex.org/W1979206939","https://openalex.org/W1982478920","https://openalex.org/W1983157164","https://openalex.org/W1985412770","https://openalex.org/W1989288162","https://openalex.org/W1998855065","https://openalex.org/W2001833310","https://openalex.org/W2009713937","https://openalex.org/W2024991751","https://openalex.org/W2026299065","https://openalex.org/W2031715986","https://openalex.org/W2039778940","https://openalex.org/W2041461390","https://openalex.org/W2048253612","https://openalex.org/W2054875385","https://openalex.org/W2056760934","https://openalex.org/W2058073282","https://openalex.org/W2063186542","https://openalex.org/W2064558818","https://openalex.org/W2068480996","https://openalex.org/W2070909027","https://openalex.org/W2078950386","https://openalex.org/W2092070658","https://openalex.org/W2097360283","https://openalex.org/W2100218206","https://openalex.org/W2108157916","https://openalex.org/W2109364787","https://openalex.org/W2111221242","https://openalex.org/W2113741278","https://openalex.org/W2125299871","https://openalex.org/W2130788193","https://openalex.org/W2139116943","https://openalex.org/W2146973119","https://openalex.org/W2147922634","https://openalex.org/W2149381887","https://openalex.org/W2152513418","https://openalex.org/W2152710595","https://openalex.org/W2154641788","https://openalex.org/W2155382668","https://openalex.org/W2157237396","https://openalex.org/W2160960847","https://openalex.org/W2162322364","https://openalex.org/W2163913714","https://openalex.org/W2171074980","https://openalex.org/W2260489273","https://openalex.org/W2314321304","https://openalex.org/W2336687883","https://openalex.org/W2340091566","https://openalex.org/W2732460962","https://openalex.org/W2763920787","https://openalex.org/W2798909945","https://openalex.org/W3024941085","https://openalex.org/W3103595226","https://openalex.org/W3123791793","https://openalex.org/W4229666556","https://openalex.org/W4294541781","https://openalex.org/W4301491118"],"related_works":["https://openalex.org/W2983282793","https://openalex.org/W1963859303","https://openalex.org/W2364044215","https://openalex.org/W2389600408","https://openalex.org/W240129890","https://openalex.org/W3048701459","https://openalex.org/W2149078538","https://openalex.org/W2080146221","https://openalex.org/W2546223573","https://openalex.org/W2370314112"],"abstract_inverted_index":{"Linear":[0],"least":[1],"squares":[2],"problems":[3],"(LLSPs)":[4],"routinely":[5],"arise":[6],"in":[7,24],"many":[8],"scientific":[9],"and":[10,41,88,111,145,181],"engineering":[11],"problems.":[12],"One":[13],"of":[14,69,83,128,162],"the":[15,67,81,126,129,172],"fastest":[16],"ways":[17],"to":[18,72,92,138,154],"solve":[19],"LLSPs":[20],"involves":[21],"performing":[22],"calculations":[23],"parallel":[25],"on":[26,47],"graphics":[27],"processing":[28],"units":[29],"(GPUs).":[30],"However,":[31],"GPU":[32,39,56,70,74,99],"algorithms":[33,53,71],"are":[34],"typically":[35],"designed":[36],"for":[37,54,60,97,117,142],"one":[38],"architecture":[40],"may":[42],"be":[43,114],"suboptimal":[44],"or":[45,100],"unusable":[46],"another":[48],"GPU.":[49],"To":[50],"design":[51],"optimal":[52,119,156],"any":[55],"with":[57,168],"little":[58],"need":[59],"modifying":[61],"code,":[62],"tuneable":[63,95],"parameters":[64,96,121,141,157],"can":[65,113,123],"simplify":[66],"transition":[68],"different":[73],"architectures.":[75],"In":[76],"this":[77],"paper,":[78],"we":[79,151],"investigate":[80],"benefits":[82],"using":[84],"derivative-free":[85],"optimization":[86,90],"(DFO)":[87],"simulation":[89],"(SO)":[91],"systematically":[93],"optimize":[94],"a":[98,178],"hybrid":[101],"CPU/GPU":[102],"LLSP":[103,131],"solvers.":[104],"Computational":[105],"experiments":[106],"show":[107],"that":[108,122],"both":[109],"DFO":[110,149],"SO":[112],"effective":[115],"tools":[116],"determining":[118],"tuning":[120,184],"speed":[124],"up":[125],"performance":[127],"popular":[130],"solver":[132],"MAGMA":[133],"by":[134],"about":[135],"1.8x,":[136],"compared":[137],"MAGMA's":[139],"default":[140],"large":[143],"tall":[144],"skinny":[146],"matrices.":[147],"Using":[148],"solvers,":[150],"were":[152],"able":[153],"identify":[155],"after":[158],"enumerating":[159],"an":[160],"order":[161],"magnitude":[163],"fewer":[164],"parameter":[165],"combinations":[166],"than":[167,177],"direct":[169],"enumeration.":[170],"Additionally,":[171],"proposed":[173],"approach":[174],"is":[175],"faster":[176],"state-of-the-art":[179],"autotuner":[180],"provides":[182],"better":[183],"parameters.":[185]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":5},{"year":2021,"cited_by_count":3}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
