{"id":"https://openalex.org/W2063532530","doi":"https://doi.org/10.1109/asap.2013.6567589","title":"Linear algebra computations in heterogeneous systems","display_name":"Linear algebra computations in heterogeneous systems","publication_year":2013,"publication_date":"2013-06-01","ids":{"openalex":"https://openalex.org/W2063532530","doi":"https://doi.org/10.1109/asap.2013.6567589","mag":"2063532530"},"language":"en","primary_location":{"id":"doi:10.1109/asap.2013.6567589","is_oa":false,"landing_page_url":"https://doi.org/10.1109/asap.2013.6567589","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2013 IEEE 24th International Conference on Application-Specific Systems, Architectures and Processors","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5068953697","display_name":"Sam Skalicky","orcid":null},"institutions":[{"id":"https://openalex.org/I155173764","display_name":"Rochester Institute of Technology","ror":"https://ror.org/00v4yb702","country_code":"US","type":"education","lineage":["https://openalex.org/I155173764"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Sam Skalicky","raw_affiliation_strings":["Department of Computer Engineering, Rochester Institute of Technology, Rochester, NY, USA","[Dept. of Comput. Eng., Rochester Inst. of Technol., Rochester, NY, USA]"],"affiliations":[{"raw_affiliation_string":"Department of Computer Engineering, Rochester Institute of Technology, Rochester, NY, USA","institution_ids":["https://openalex.org/I155173764"]},{"raw_affiliation_string":"[Dept. of Comput. Eng., Rochester Inst. of Technol., Rochester, NY, USA]","institution_ids":["https://openalex.org/I155173764"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102940391","display_name":"Sonia Mart\u00edn L\u00f3pez","orcid":"https://orcid.org/0000-0002-4308-5572"},"institutions":[{"id":"https://openalex.org/I155173764","display_name":"Rochester Institute of Technology","ror":"https://ror.org/00v4yb702","country_code":"US","type":"education","lineage":["https://openalex.org/I155173764"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Sonia Lopez","raw_affiliation_strings":["Department of Computer Engineering, Rochester Institute of Technology, Rochester, NY, USA","[Dept. of Comput. Eng., Rochester Inst. of Technol., Rochester, NY, USA]"],"affiliations":[{"raw_affiliation_string":"Department of Computer Engineering, Rochester Institute of Technology, Rochester, NY, USA","institution_ids":["https://openalex.org/I155173764"]},{"raw_affiliation_string":"[Dept. of Comput. Eng., Rochester Inst. of Technol., Rochester, NY, USA]","institution_ids":["https://openalex.org/I155173764"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071708090","display_name":"Marcin \u0141ukowiak","orcid":null},"institutions":[{"id":"https://openalex.org/I155173764","display_name":"Rochester Institute of Technology","ror":"https://ror.org/00v4yb702","country_code":"US","type":"education","lineage":["https://openalex.org/I155173764"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Marcin Lukowiak","raw_affiliation_strings":["[Dept. of Comput. Eng., Rochester Inst. of Technol., Rochester, NY, USA]"],"affiliations":[{"raw_affiliation_string":"[Dept. of Comput. Eng., Rochester Inst. of Technol., Rochester, NY, USA]","institution_ids":["https://openalex.org/I155173764"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5063365925","display_name":"James Letendre","orcid":null},"institutions":[{"id":"https://openalex.org/I155173764","display_name":"Rochester Institute of Technology","ror":"https://ror.org/00v4yb702","country_code":"US","type":"education","lineage":["https://openalex.org/I155173764"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"James Letendre","raw_affiliation_strings":["Department of Computer Engineering, Rochester Institute of Technology, Rochester, NY, USA","[Dept. of Comput. Eng., Rochester Inst. of Technol., Rochester, NY, USA]"],"affiliations":[{"raw_affiliation_string":"Department of Computer Engineering, Rochester Institute of Technology, Rochester, NY, USA","institution_ids":["https://openalex.org/I155173764"]},{"raw_affiliation_string":"[Dept. of Comput. Eng., Rochester Inst. of Technol., Rochester, NY, USA]","institution_ids":["https://openalex.org/I155173764"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5084786983","display_name":"David Gasser","orcid":null},"institutions":[{"id":"https://openalex.org/I155173764","display_name":"Rochester Institute of Technology","ror":"https://ror.org/00v4yb702","country_code":"US","type":"education","lineage":["https://openalex.org/I155173764"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"David Gasser","raw_affiliation_strings":["Department of Computer Engineering, Rochester Institute of Technology, Rochester, NY, USA","[Dept. of Comput. Eng., Rochester Inst. of Technol., Rochester, NY, USA]"],"affiliations":[{"raw_affiliation_string":"Department of Computer Engineering, Rochester Institute of Technology, Rochester, NY, USA","institution_ids":["https://openalex.org/I155173764"]},{"raw_affiliation_string":"[Dept. of Comput. Eng., Rochester Inst. of Technol., Rochester, NY, USA]","institution_ids":["https://openalex.org/I155173764"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5068953697"],"corresponding_institution_ids":["https://openalex.org/I155173764"],"apc_list":null,"apc_paid":null,"fwci":1.2608,"has_fulltext":false,"cited_by_count":7,"citation_normalized_percentile":{"value":0.80608874,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"273","last_page":"276"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11697","display_name":"Numerical Methods and Algorithms","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10363","display_name":"Low-power high-performance VLSI design","score":0.9962999820709229,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.858634352684021},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.7862334251403809},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.7319213151931763},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.7113626003265381},{"id":"https://openalex.org/keywords/memory-bandwidth","display_name":"Memory bandwidth","score":0.632180392742157},{"id":"https://openalex.org/keywords/field-programmable-gate-array","display_name":"Field-programmable gate array","score":0.584367036819458},{"id":"https://openalex.org/keywords/cuda","display_name":"CUDA","score":0.5405274033546448},{"id":"https://openalex.org/keywords/parallelism","display_name":"Parallelism (grammar)","score":0.5293113589286804},{"id":"https://openalex.org/keywords/numerical-linear-algebra","display_name":"Numerical linear algebra","score":0.502744197845459},{"id":"https://openalex.org/keywords/bandwidth","display_name":"Bandwidth (computing)","score":0.5021069049835205},{"id":"https://openalex.org/keywords/linear-algebra","display_name":"Linear algebra","score":0.4949638247489929},{"id":"https://openalex.org/keywords/clock-rate","display_name":"Clock rate","score":0.46113526821136475},{"id":"https://openalex.org/keywords/computational-complexity-theory","display_name":"Computational complexity theory","score":0.4561225473880768},{"id":"https://openalex.org/keywords/central-processing-unit","display_name":"Central processing unit","score":0.4365715980529785},{"id":"https://openalex.org/keywords/symmetric-multiprocessor-system","display_name":"Symmetric multiprocessor system","score":0.4222722053527832},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.34586501121520996},{"id":"https://openalex.org/keywords/computational-science","display_name":"Computational science","score":0.33231422305107117},{"id":"https://openalex.org/keywords/computer-hardware","display_name":"Computer hardware","score":0.2917228043079376},{"id":"https://openalex.org/keywords/linear-system","display_name":"Linear system","score":0.2808973789215088},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.20689606666564941}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.858634352684021},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.7862334251403809},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.7319213151931763},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.7113626003265381},{"id":"https://openalex.org/C188045654","wikidata":"https://www.wikidata.org/wiki/Q17148339","display_name":"Memory bandwidth","level":2,"score":0.632180392742157},{"id":"https://openalex.org/C42935608","wikidata":"https://www.wikidata.org/wiki/Q190411","display_name":"Field-programmable gate array","level":2,"score":0.584367036819458},{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.5405274033546448},{"id":"https://openalex.org/C2781172179","wikidata":"https://www.wikidata.org/wiki/Q853109","display_name":"Parallelism (grammar)","level":2,"score":0.5293113589286804},{"id":"https://openalex.org/C163834973","wikidata":"https://www.wikidata.org/wiki/Q2004891","display_name":"Numerical linear algebra","level":3,"score":0.502744197845459},{"id":"https://openalex.org/C2776257435","wikidata":"https://www.wikidata.org/wiki/Q1576430","display_name":"Bandwidth (computing)","level":2,"score":0.5021069049835205},{"id":"https://openalex.org/C139352143","wikidata":"https://www.wikidata.org/wiki/Q82571","display_name":"Linear algebra","level":2,"score":0.4949638247489929},{"id":"https://openalex.org/C178693496","wikidata":"https://www.wikidata.org/wiki/Q911691","display_name":"Clock rate","level":3,"score":0.46113526821136475},{"id":"https://openalex.org/C179799912","wikidata":"https://www.wikidata.org/wiki/Q205084","display_name":"Computational complexity theory","level":2,"score":0.4561225473880768},{"id":"https://openalex.org/C49154492","wikidata":"https://www.wikidata.org/wiki/Q5300","display_name":"Central processing unit","level":2,"score":0.4365715980529785},{"id":"https://openalex.org/C172430144","wikidata":"https://www.wikidata.org/wiki/Q17111997","display_name":"Symmetric multiprocessor system","level":2,"score":0.4222722053527832},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.34586501121520996},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.33231422305107117},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.2917228043079376},{"id":"https://openalex.org/C6802819","wikidata":"https://www.wikidata.org/wiki/Q1072174","display_name":"Linear system","level":2,"score":0.2808973789215088},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.20689606666564941},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0},{"id":"https://openalex.org/C165005293","wikidata":"https://www.wikidata.org/wiki/Q1074500","display_name":"Chip","level":2,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/asap.2013.6567589","is_oa":false,"landing_page_url":"https://doi.org/10.1109/asap.2013.6567589","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2013 IEEE 24th International Conference on Application-Specific Systems, Architectures and Processors","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":13,"referenced_works":["https://openalex.org/W53102941","https://openalex.org/W1673543698","https://openalex.org/W2045289186","https://openalex.org/W2064712527","https://openalex.org/W2093472761","https://openalex.org/W2103371021","https://openalex.org/W2132367502","https://openalex.org/W2138336319","https://openalex.org/W2143828148","https://openalex.org/W2145055833","https://openalex.org/W2271934614","https://openalex.org/W2545224791","https://openalex.org/W6694197373"],"related_works":["https://openalex.org/W4318187686","https://openalex.org/W4386322366","https://openalex.org/W4294069467","https://openalex.org/W4200045707","https://openalex.org/W2330590072","https://openalex.org/W2350289853","https://openalex.org/W2546451654","https://openalex.org/W2593305490","https://openalex.org/W2224054022","https://openalex.org/W2381065588"],"abstract_inverted_index":{"One":[0],"of":[1,5,58,70,97,109,147,167],"the":[2,11,15,20,56,122,129,145],"main":[3],"challenges":[4],"using":[6],"heterogeneous":[7,162],"systems":[8,178],"results":[9,113,156],"from":[10],"need":[12],"to":[13,54,116,119,169,174],"find":[14],"computation-to-hardware":[16],"assignments":[17,150],"that":[18,28,84,158],"maximize":[19],"overall":[21],"application":[22,138],"performance.":[23,153],"The":[24,112],"important":[25],"computational":[26,72,130],"factors":[27],"must":[29],"be":[30],"taken":[31],"into":[32],"account":[33],"include":[34],"algorithmic":[35],"complexity,":[36],"exploitable":[37],"parallelism,":[38],"memory":[39,75],"bandwidth":[40],"requirements,":[41],"and":[42,74,93,102,171,176],"data":[43,110],"size.":[44],"To":[45],"achieve":[46],"high":[47],"performance,":[48],"a":[49,59,106,133,140,159,165],"hardware":[50,125],"platform":[51,126],"is":[52],"chosen":[53],"satisfy":[55],"needs":[57],"computation":[60,149],"with":[61],"corresponding":[62],"architectural":[63],"features":[64],"such":[65],"as":[66,139],"clock":[67],"speed,":[68],"number":[69],"parallel":[71],"units,":[73],"bandwidth.":[76],"In":[77],"this":[78],"paper":[79],"five":[80],"linear":[81],"algebra":[82],"computations":[83],"are":[85,91,114],"commonly":[86],"found":[87],"in":[88,95],"compute-intensive":[89],"applications":[90],"selected":[92],"evaluated":[94],"terms":[96],"performance":[98],"on":[99,128],"CPU,":[100],"GPU,":[101],"FPGA":[103],"platforms":[104],"across":[105],"wide":[107],"range":[108],"sizes.":[111],"used":[115],"provide":[117],"guidelines":[118],"help":[120],"select":[121],"best":[123],"performing":[124],"based":[127],"factors.":[131],"Using":[132],"cutting":[134],"edge":[135],"signal":[136],"processing":[137],"case":[141],"study,":[142],"we":[143],"demonstrate":[144],"importance":[146],"making":[148],"for":[151],"improved":[152],"Our":[154],"experimental":[155],"show":[157],"properly":[160],"implemented":[161],"system":[163],"achieves":[164],"speedup":[166],"up":[168],"39x":[170],"3.8x":[172],"compared":[173],"CPU-only":[175],"GPU-only":[177],"respectively.":[179]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2020,"cited_by_count":1},{"year":2017,"cited_by_count":1},{"year":2015,"cited_by_count":1},{"year":2014,"cited_by_count":2},{"year":2013,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
