{"id":"https://openalex.org/W2509368344","doi":"https://doi.org/10.1109/hpec.2016.7761591","title":"LU, QR, and Cholesky factorizations: Programming model, performance analysis and optimization techniques for the Intel Knights Landing Xeon Phi","display_name":"LU, QR, and Cholesky factorizations: Programming model, performance analysis and optimization techniques for the Intel Knights Landing Xeon Phi","publication_year":2016,"publication_date":"2016-09-01","ids":{"openalex":"https://openalex.org/W2509368344","doi":"https://doi.org/10.1109/hpec.2016.7761591","mag":"2509368344"},"language":"en","primary_location":{"id":"doi:10.1109/hpec.2016.7761591","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpec.2016.7761591","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2016 IEEE High Performance Extreme Computing Conference (HPEC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101964224","display_name":"Azzam Haidar","orcid":"https://orcid.org/0000-0002-3177-2084"},"institutions":[{"id":"https://openalex.org/I75027704","display_name":"University of Tennessee at Knoxville","ror":"https://ror.org/020f3ap87","country_code":"US","type":"education","lineage":["https://openalex.org/I75027704"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Azzam Haidar","raw_affiliation_strings":["University of Tennessee, Knoxville, TN"],"affiliations":[{"raw_affiliation_string":"University of Tennessee, Knoxville, TN","institution_ids":["https://openalex.org/I75027704"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5083604741","display_name":"Stanimire Tomov","orcid":"https://orcid.org/0000-0002-5937-7959"},"institutions":[{"id":"https://openalex.org/I75027704","display_name":"University of Tennessee at Knoxville","ror":"https://ror.org/020f3ap87","country_code":"US","type":"education","lineage":["https://openalex.org/I75027704"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Stanimire Tomov","raw_affiliation_strings":["University of Tennessee, Knoxville, TN"],"affiliations":[{"raw_affiliation_string":"University of Tennessee, Knoxville, TN","institution_ids":["https://openalex.org/I75027704"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5090458436","display_name":"Konstantin Arturov","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Konstantin Arturov","raw_affiliation_strings":["Intel Corporation, Novosibirsk, Russia"],"affiliations":[{"raw_affiliation_string":"Intel Corporation, Novosibirsk, Russia","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5069028101","display_name":"Murat Efe Guney","orcid":null},"institutions":[{"id":"https://openalex.org/I1343180700","display_name":"Intel (United States)","ror":"https://ror.org/01ek73717","country_code":"US","type":"company","lineage":["https://openalex.org/I1343180700"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Murat Guney","raw_affiliation_strings":["Intel Corporation, Hillsboro, OR"],"affiliations":[{"raw_affiliation_string":"Intel Corporation, Hillsboro, OR","institution_ids":["https://openalex.org/I1343180700"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5024848824","display_name":"Shane Story","orcid":null},"institutions":[{"id":"https://openalex.org/I1343180700","display_name":"Intel (United States)","ror":"https://ror.org/01ek73717","country_code":"US","type":"company","lineage":["https://openalex.org/I1343180700"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Shane Story","raw_affiliation_strings":["Intel Corporation, Hillsboro, OR"],"affiliations":[{"raw_affiliation_string":"Intel Corporation, Hillsboro, OR","institution_ids":["https://openalex.org/I1343180700"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5075517045","display_name":"Jack Dongarra","orcid":"https://orcid.org/0000-0003-3247-1782"},"institutions":[{"id":"https://openalex.org/I75027704","display_name":"University of Tennessee at Knoxville","ror":"https://ror.org/020f3ap87","country_code":"US","type":"education","lineage":["https://openalex.org/I75027704"]},{"id":"https://openalex.org/I28407311","display_name":"University of Manchester","ror":"https://ror.org/027m9bs27","country_code":"GB","type":"education","lineage":["https://openalex.org/I28407311"]}],"countries":["GB","US"],"is_corresponding":false,"raw_author_name":"Jack Dongarra","raw_affiliation_strings":["University of Tennessee, University of Manchester, UK"],"affiliations":[{"raw_affiliation_string":"University of Tennessee, University of Manchester, UK","institution_ids":["https://openalex.org/I28407311","https://openalex.org/I75027704"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5101964224"],"corresponding_institution_ids":["https://openalex.org/I75027704"],"apc_list":null,"apc_paid":null,"fwci":4.1735,"has_fulltext":false,"cited_by_count":13,"citation_normalized_percentile":{"value":0.94201972,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"7"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8729302883148193},{"id":"https://openalex.org/keywords/xeon-phi","display_name":"Xeon Phi","score":0.755859375},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.6812357306480408},{"id":"https://openalex.org/keywords/programming-paradigm","display_name":"Programming paradigm","score":0.51042240858078},{"id":"https://openalex.org/keywords/task-parallelism","display_name":"Task parallelism","score":0.4985976219177246},{"id":"https://openalex.org/keywords/data-parallelism","display_name":"Data parallelism","score":0.48974013328552246},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.4664776921272278},{"id":"https://openalex.org/keywords/dataflow","display_name":"Dataflow","score":0.4601150155067444},{"id":"https://openalex.org/keywords/scheduling","display_name":"Scheduling (production processes)","score":0.44504547119140625},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.39916670322418213},{"id":"https://openalex.org/keywords/parallelism","display_name":"Parallelism (grammar)","score":0.26180320978164673},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.2604524493217468},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.159837543964386}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8729302883148193},{"id":"https://openalex.org/C96972482","wikidata":"https://www.wikidata.org/wiki/Q1049168","display_name":"Xeon Phi","level":2,"score":0.755859375},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.6812357306480408},{"id":"https://openalex.org/C34165917","wikidata":"https://www.wikidata.org/wiki/Q188267","display_name":"Programming paradigm","level":2,"score":0.51042240858078},{"id":"https://openalex.org/C42992933","wikidata":"https://www.wikidata.org/wiki/Q691169","display_name":"Task parallelism","level":3,"score":0.4985976219177246},{"id":"https://openalex.org/C61483411","wikidata":"https://www.wikidata.org/wiki/Q3124522","display_name":"Data parallelism","level":3,"score":0.48974013328552246},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.4664776921272278},{"id":"https://openalex.org/C96324660","wikidata":"https://www.wikidata.org/wiki/Q205446","display_name":"Dataflow","level":2,"score":0.4601150155067444},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.44504547119140625},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.39916670322418213},{"id":"https://openalex.org/C2781172179","wikidata":"https://www.wikidata.org/wiki/Q853109","display_name":"Parallelism (grammar)","level":2,"score":0.26180320978164673},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.2604524493217468},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.159837543964386},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/hpec.2016.7761591","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpec.2016.7761591","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2016 IEEE High Performance Extreme Computing Conference (HPEC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":25,"referenced_works":["https://openalex.org/W36826159","https://openalex.org/W1028753842","https://openalex.org/W1968460630","https://openalex.org/W1988389332","https://openalex.org/W1988888548","https://openalex.org/W2011903918","https://openalex.org/W2016279572","https://openalex.org/W2016358047","https://openalex.org/W2023096360","https://openalex.org/W2049926709","https://openalex.org/W2085118703","https://openalex.org/W2104861744","https://openalex.org/W2113277707","https://openalex.org/W2121893797","https://openalex.org/W2162322364","https://openalex.org/W2165904264","https://openalex.org/W2170597842","https://openalex.org/W2179933257","https://openalex.org/W2284845123","https://openalex.org/W2340091566","https://openalex.org/W2514474307","https://openalex.org/W2946724226","https://openalex.org/W3145506805","https://openalex.org/W3187688612","https://openalex.org/W4205190765"],"related_works":["https://openalex.org/W4240606930","https://openalex.org/W2494130044","https://openalex.org/W1972271823","https://openalex.org/W2003935582","https://openalex.org/W4399872414","https://openalex.org/W168408236","https://openalex.org/W2950520577","https://openalex.org/W74409296","https://openalex.org/W2040503315","https://openalex.org/W191463404"],"abstract_inverted_index":{"A":[0],"wide":[1],"variety":[2],"of":[3,51,65,161,173,196],"heterogeneous":[4,151,206],"compute":[5],"resources,":[6],"ranging":[7,125],"from":[8,126],"multicore":[9],"CPUs":[10],"to":[11,17,23,41,73,88,105,111,130,168,185],"GPUs":[12],"and":[13,30,58,67,81,97,110,115,135,158,171,189],"coprocessors,":[14],"are":[15],"available":[16],"modern":[18],"computers,":[19],"making":[20],"it":[21],"challenging":[22],"design":[24,57],"unified":[25,145],"numerical":[26,176],"libraries":[27,204],"that":[28,85,178],"efficiently":[29,42],"productively":[31],"use":[32,43],"all":[33,149],"these":[34],"varied":[35],"resources.":[36,152],"For":[37],"example,":[38],"in":[39,62,71,166,183],"order":[40,72,184],"Intel's":[44],"Knights":[45],"Landing":[46],"(KNL)":[47],"processor,":[48],"the":[49,107,113,116,150,156,159,193],"next-generation":[50],"Xeon":[52],"Phi":[53],"architectures,":[54],"one":[55],"must":[56],"schedule":[59],"an":[60],"application":[61],"multiple":[63,123],"degrees":[64],"parallelism":[66,96],"task":[68,132,137,142],"grain":[69,133],"sizes":[70],"obtain":[74],"efficient":[75],"performance.":[76],"We":[77],"propose":[78],"a":[79,90,101],"productive":[80],"portable":[82],"programming":[83,174,198],"model":[84],"allows":[86],"us":[87],"write":[89],"serial-looking":[91],"code,":[92],"which,":[93],"however,":[94],"achieves":[95],"scalability":[98],"by":[99],"using":[100],"lightweight":[102],"runtime":[103],"environment":[104],"manage":[106],"resource-specific":[108],"workload,":[109],"control":[112],"dataflow":[114],"parallel":[117,197],"execution.":[118],"This":[119],"is":[120],"done":[121],"through":[122],"techniques":[124],"multi-level":[127],"data":[128],"partitioning":[129],"adaptive":[131],"sizes,":[134],"dynamic":[136],"scheduling.":[138],"In":[139],"addition,":[140],"our":[141],"abstractions":[143],"enable":[144],"algorithmic":[146],"development":[147],"across":[148],"Finally,":[153],"we":[154],"outline":[155],"strengths":[157],"effectiveness":[160],"this":[162],"approach":[163],"-":[164,182],"especially":[165],"regards":[167],"hardware":[169],"trends":[170],"ease":[172],"high-performance":[175,201],"software":[177],"current":[179,187],"applications":[180],"need":[181],"motivate":[186],"work":[188],"future":[190],"directions":[191],"for":[192,200],"next":[194],"generation":[195],"models":[199],"linear":[202],"algebra":[203],"on":[205],"systems.":[207]},"counts_by_year":[{"year":2019,"cited_by_count":3},{"year":2018,"cited_by_count":5},{"year":2017,"cited_by_count":5}],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
