{"id":"https://openalex.org/W2093783472","doi":"https://doi.org/10.1145/1531743.1531756","title":"Mapping the LU decomposition on a many-core architecture","display_name":"Mapping the LU decomposition on a many-core architecture","publication_year":2009,"publication_date":"2009-05-18","ids":{"openalex":"https://openalex.org/W2093783472","doi":"https://doi.org/10.1145/1531743.1531756","mag":"2093783472"},"language":"en","primary_location":{"id":"doi:10.1145/1531743.1531756","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1531743.1531756","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 6th ACM conference on Computing frontiers","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5064764358","display_name":"Ioannis E. Venetis","orcid":"https://orcid.org/0000-0002-0022-0447"},"institutions":[{"id":"https://openalex.org/I174878644","display_name":"University of Patras","ror":"https://ror.org/017wvtq80","country_code":"GR","type":"education","lineage":["https://openalex.org/I174878644"]}],"countries":["GR"],"is_corresponding":true,"raw_author_name":"Ioannis E. Venetis","raw_affiliation_strings":["University of Patras, Patras, Greece"],"affiliations":[{"raw_affiliation_string":"University of Patras, Patras, Greece","institution_ids":["https://openalex.org/I174878644"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5046024163","display_name":"Guang R. Gao","orcid":"https://orcid.org/0000-0002-5265-7528"},"institutions":[{"id":"https://openalex.org/I86501945","display_name":"University of Delaware","ror":"https://ror.org/01sbq1a82","country_code":"US","type":"education","lineage":["https://openalex.org/I86501945"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Guang R. Gao","raw_affiliation_strings":["University of Delaware, Newark, DE, USA","University of Delaware, Newark, DE. USA"],"affiliations":[{"raw_affiliation_string":"University of Delaware, Newark, DE, USA","institution_ids":["https://openalex.org/I86501945"]},{"raw_affiliation_string":"University of Delaware, Newark, DE. USA","institution_ids":["https://openalex.org/I86501945"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5064764358"],"corresponding_institution_ids":["https://openalex.org/I174878644"],"apc_list":null,"apc_paid":null,"fwci":4.2211,"has_fulltext":false,"cited_by_count":24,"citation_normalized_percentile":{"value":0.94583128,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"71","last_page":"80"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10829","display_name":"Interconnection Networks and Systems","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8560634851455688},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.7079600691795349},{"id":"https://openalex.org/keywords/cache","display_name":"Cache","score":0.5007240772247314},{"id":"https://openalex.org/keywords/multi-core-processor","display_name":"Multi-core processor","score":0.48292776942253113},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.4686179757118225},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.45035961270332336},{"id":"https://openalex.org/keywords/single-precision-floating-point-format","display_name":"Single-precision floating-point format","score":0.43046507239341736},{"id":"https://openalex.org/keywords/register-file","display_name":"Register file","score":0.42572206258773804},{"id":"https://openalex.org/keywords/floating-point","display_name":"Floating point","score":0.41208508610725403},{"id":"https://openalex.org/keywords/instruction-set","display_name":"Instruction set","score":0.34547436237335205},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.30952876806259155}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8560634851455688},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.7079600691795349},{"id":"https://openalex.org/C115537543","wikidata":"https://www.wikidata.org/wiki/Q165596","display_name":"Cache","level":2,"score":0.5007240772247314},{"id":"https://openalex.org/C78766204","wikidata":"https://www.wikidata.org/wiki/Q555032","display_name":"Multi-core processor","level":2,"score":0.48292776942253113},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.4686179757118225},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.45035961270332336},{"id":"https://openalex.org/C133095886","wikidata":"https://www.wikidata.org/wiki/Q1307173","display_name":"Single-precision floating-point format","level":3,"score":0.43046507239341736},{"id":"https://openalex.org/C117280010","wikidata":"https://www.wikidata.org/wiki/Q180944","display_name":"Register file","level":3,"score":0.42572206258773804},{"id":"https://openalex.org/C84211073","wikidata":"https://www.wikidata.org/wiki/Q117879","display_name":"Floating point","level":2,"score":0.41208508610725403},{"id":"https://openalex.org/C202491316","wikidata":"https://www.wikidata.org/wiki/Q272683","display_name":"Instruction set","level":2,"score":0.34547436237335205},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.30952876806259155},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/1531743.1531756","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1531743.1531756","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 6th ACM conference on Computing frontiers","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":20,"referenced_works":["https://openalex.org/W1480928214","https://openalex.org/W1510263988","https://openalex.org/W1692608938","https://openalex.org/W1989817309","https://openalex.org/W2002257715","https://openalex.org/W2012309011","https://openalex.org/W2026237859","https://openalex.org/W2065771339","https://openalex.org/W2088943630","https://openalex.org/W2102582914","https://openalex.org/W2108315152","https://openalex.org/W2121082877","https://openalex.org/W2124480634","https://openalex.org/W2125357468","https://openalex.org/W2145021036","https://openalex.org/W2162830667","https://openalex.org/W4229666556","https://openalex.org/W4234643798","https://openalex.org/W4285719527","https://openalex.org/W6730824977"],"related_works":["https://openalex.org/W1564887326","https://openalex.org/W2116803521","https://openalex.org/W3215589575","https://openalex.org/W3150370983","https://openalex.org/W2239119680","https://openalex.org/W2773283032","https://openalex.org/W4239584669","https://openalex.org/W2045555750","https://openalex.org/W2797902698","https://openalex.org/W2771613338"],"abstract_inverted_index":{"Recently,":[0],"multi-core":[1],"architectures":[2],"with":[3,186],"alternative":[4],"memory":[5],"subsystem":[6],"designs":[7],"have":[8,141],"emerged.":[9],"Instead":[10],"of":[11,37,41,99,181],"using":[12],"hardware-managed":[13],"cache":[14],"hierarchies,":[15],"they":[16],"employ":[17],"software-managed":[18],"embedded":[19],"memory.":[20],"An":[21],"open":[22],"question":[23],"is":[24,93,134],"what":[25],"programming":[26],"and":[27,119,137,169,183,194],"compiling":[28],"methods":[29],"are":[30],"effective":[31],"to":[32,69,95,124],"exploit":[33],"the":[34,44,63,70,75,97,100,114],"performance":[35,180],"potential":[36,98],"this":[38,92],"new":[39],"class":[40],"architectures.":[42],"Using":[43],"LU":[45,72],"decomposition":[46],"as":[47],"a":[48,58,107,142,151,171,179,192,195],"case":[49],"study,":[50],"we":[51,105,149],"propose":[52],"three":[53],"techniques":[54],"that":[55,112,131,138],"combined":[56],"achieve":[57,178],"27":[59],"times":[60],"speedup":[61],"on":[62],"IBM":[64],"Cyclops-64":[65,166],"many-core":[66],"architecture,":[67],"compared":[68],"parallel":[71],"implementation":[73],"from":[74],"SPLASH-2":[76],"benchmarks":[77],"suite.":[78],"Our":[79],"first":[80],"method":[81,108,133,154],"allows":[82],"adaptive":[83],"load":[84],"distribution,":[85],"which":[86],"maximizes":[87,120],"load-balance":[88],"among":[89],"cores":[90],"-":[91],"important":[94],"leverage":[96],"next":[101],"two":[102],"methods.":[103],"Secondly,":[104],"developed":[106],"for":[109,155,191],"register":[110,125,152,156],"tiling":[111],"determines":[113],"optimal":[115],"data":[116,121],"tile":[117],"parameters":[118],"reuse":[122],"according":[123],"file":[126],"size":[127],"constraints.":[128],"We":[129,160,177],"demonstrate":[130],"our":[132],"inherently":[135],"general":[136],"it":[139],"should":[140],"much":[143],"broader":[144],"applicability":[145],"beyond":[146],"Cyclops-64.":[147],"Thirdly,":[148],"present":[150],"allocation":[153],"tiled":[157],"loop":[158],"bodies.":[159],"evaluate":[161],"its":[162],"effect":[163],"through":[164],"hand-tuned":[165],"assembly":[167],"code":[168],"observe":[170],"6-fold":[172],"reduction":[173],"in":[174],"load/store":[175],"operations.":[176],"19.17":[182],"27.50":[184],"GFlops":[185],"double-precision":[187],"floating":[188],"point":[189],"numbers,":[190],"700x700":[193],"1000x1000":[196],"matrix":[197],"respectively.":[198]},"counts_by_year":[{"year":2020,"cited_by_count":1},{"year":2018,"cited_by_count":1},{"year":2016,"cited_by_count":1},{"year":2014,"cited_by_count":2},{"year":2013,"cited_by_count":3},{"year":2012,"cited_by_count":4}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
