{"id":"https://openalex.org/W1982009514","doi":"https://doi.org/10.1109/asap.2014.6868658","title":"A scalable and compact systolic architecture for linear solvers","display_name":"A scalable and compact systolic architecture for linear solvers","publication_year":2014,"publication_date":"2014-06-01","ids":{"openalex":"https://openalex.org/W1982009514","doi":"https://doi.org/10.1109/asap.2014.6868658","mag":"1982009514"},"language":"en","primary_location":{"id":"doi:10.1109/asap.2014.6868658","is_oa":false,"landing_page_url":"https://doi.org/10.1109/asap.2014.6868658","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2014 IEEE 25th International Conference on Application-Specific Systems, Architectures and Processors","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5042242910","display_name":"Kevin Shen Hoong Ong","orcid":"https://orcid.org/0000-0002-1611-7612"},"institutions":[{"id":"https://openalex.org/I172675005","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302","country_code":"SG","type":"education","lineage":["https://openalex.org/I172675005"]}],"countries":["SG"],"is_corresponding":true,"raw_author_name":"Kevin S. H. Ong","raw_affiliation_strings":["Nanyang Technological University, Singapore, Singapore, SG","Sch. of Comput. Eng., Nanyang Technol. Univ., Singapore, , Singapore"],"affiliations":[{"raw_affiliation_string":"Nanyang Technological University, Singapore, Singapore, SG","institution_ids":["https://openalex.org/I172675005"]},{"raw_affiliation_string":"Sch. of Comput. Eng., Nanyang Technol. Univ., Singapore, , Singapore","institution_ids":["https://openalex.org/I172675005"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5032556461","display_name":"Suhaib A. Fahmy","orcid":"https://orcid.org/0000-0003-0568-5048"},"institutions":[{"id":"https://openalex.org/I172675005","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302","country_code":"SG","type":"education","lineage":["https://openalex.org/I172675005"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Suhaib A. Fahmy","raw_affiliation_strings":["Nanyang Technological University, Singapore, Singapore, SG","Sch. of Comput. Eng., Nanyang Technol. Univ., Singapore, , Singapore"],"affiliations":[{"raw_affiliation_string":"Nanyang Technological University, Singapore, Singapore, SG","institution_ids":["https://openalex.org/I172675005"]},{"raw_affiliation_string":"Sch. of Comput. Eng., Nanyang Technol. Univ., Singapore, , Singapore","institution_ids":["https://openalex.org/I172675005"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5017456876","display_name":"Keck Voon Ling","orcid":"https://orcid.org/0000-0002-9293-9394"},"institutions":[{"id":"https://openalex.org/I172675005","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302","country_code":"SG","type":"education","lineage":["https://openalex.org/I172675005"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Keck-Voon Ling","raw_affiliation_strings":["Nanyang Technological University, Singapore, Singapore, SG","Sch. of Electr. & Electron. Eng., Nanyang Technol. Univ., Singapore, , Singapore"],"affiliations":[{"raw_affiliation_string":"Nanyang Technological University, Singapore, Singapore, SG","institution_ids":["https://openalex.org/I172675005"]},{"raw_affiliation_string":"Sch. of Electr. & Electron. Eng., Nanyang Technol. Univ., Singapore, , Singapore","institution_ids":["https://openalex.org/I172675005"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5042242910"],"corresponding_institution_ids":["https://openalex.org/I172675005"],"apc_list":null,"apc_paid":null,"fwci":0.4187,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.66022361,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"186","last_page":"187"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10363","display_name":"Low-power high-performance VLSI design","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10363","display_name":"Low-power high-performance VLSI design","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9972000122070312,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10904","display_name":"Embedded Systems Design Techniques","score":0.9957000017166138,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/field-programmable-gate-array","display_name":"Field-programmable gate array","score":0.8228623867034912},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.7937596440315247},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6927276253700256},{"id":"https://openalex.org/keywords/solver","display_name":"Solver","score":0.688962996006012},{"id":"https://openalex.org/keywords/systolic-array","display_name":"Systolic array","score":0.6867349147796631},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.6468424201011658},{"id":"https://openalex.org/keywords/block","display_name":"Block (permutation group theory)","score":0.5985984206199646},{"id":"https://openalex.org/keywords/linear-system","display_name":"Linear system","score":0.5538243651390076},{"id":"https://openalex.org/keywords/block-size","display_name":"Block size","score":0.5504287481307983},{"id":"https://openalex.org/keywords/lu-decomposition","display_name":"LU decomposition","score":0.5261670351028442},{"id":"https://openalex.org/keywords/throughput","display_name":"Throughput","score":0.5255062580108643},{"id":"https://openalex.org/keywords/system-of-linear-equations","display_name":"System of linear equations","score":0.5184330940246582},{"id":"https://openalex.org/keywords/computational-science","display_name":"Computational science","score":0.49528804421424866},{"id":"https://openalex.org/keywords/computer-hardware","display_name":"Computer hardware","score":0.2647101581096649},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.23887863755226135},{"id":"https://openalex.org/keywords/matrix-decomposition","display_name":"Matrix decomposition","score":0.21201127767562866},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.1561213731765747},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.125440776348114}],"concepts":[{"id":"https://openalex.org/C42935608","wikidata":"https://www.wikidata.org/wiki/Q190411","display_name":"Field-programmable gate array","level":2,"score":0.8228623867034912},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.7937596440315247},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6927276253700256},{"id":"https://openalex.org/C2778770139","wikidata":"https://www.wikidata.org/wiki/Q1966904","display_name":"Solver","level":2,"score":0.688962996006012},{"id":"https://openalex.org/C150741067","wikidata":"https://www.wikidata.org/wiki/Q2377218","display_name":"Systolic array","level":3,"score":0.6867349147796631},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.6468424201011658},{"id":"https://openalex.org/C2777210771","wikidata":"https://www.wikidata.org/wiki/Q4927124","display_name":"Block (permutation group theory)","level":2,"score":0.5985984206199646},{"id":"https://openalex.org/C6802819","wikidata":"https://www.wikidata.org/wiki/Q1072174","display_name":"Linear system","level":2,"score":0.5538243651390076},{"id":"https://openalex.org/C41431624","wikidata":"https://www.wikidata.org/wiki/Q1053357","display_name":"Block size","level":3,"score":0.5504287481307983},{"id":"https://openalex.org/C123213974","wikidata":"https://www.wikidata.org/wiki/Q833089","display_name":"LU decomposition","level":4,"score":0.5261670351028442},{"id":"https://openalex.org/C157764524","wikidata":"https://www.wikidata.org/wiki/Q1383412","display_name":"Throughput","level":3,"score":0.5255062580108643},{"id":"https://openalex.org/C94523830","wikidata":"https://www.wikidata.org/wiki/Q11203","display_name":"System of linear equations","level":2,"score":0.5184330940246582},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.49528804421424866},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.2647101581096649},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.23887863755226135},{"id":"https://openalex.org/C42355184","wikidata":"https://www.wikidata.org/wiki/Q1361088","display_name":"Matrix decomposition","level":3,"score":0.21201127767562866},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.1561213731765747},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.125440776348114},{"id":"https://openalex.org/C158693339","wikidata":"https://www.wikidata.org/wiki/Q190524","display_name":"Eigenvalues and eigenvectors","level":2,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C14580979","wikidata":"https://www.wikidata.org/wiki/Q876049","display_name":"Very-large-scale integration","level":2,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C555944384","wikidata":"https://www.wikidata.org/wiki/Q249","display_name":"Wireless","level":2,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/asap.2014.6868658","is_oa":false,"landing_page_url":"https://doi.org/10.1109/asap.2014.6868658","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2014 IEEE 25th International Conference on Application-Specific Systems, Architectures and Processors","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.4300000071525574,"display_name":"Industry, innovation and infrastructure","id":"https://metadata.un.org/sdg/9"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":9,"referenced_works":["https://openalex.org/W31028703","https://openalex.org/W848332206","https://openalex.org/W1974866421","https://openalex.org/W2010360826","https://openalex.org/W2023406772","https://openalex.org/W2063430487","https://openalex.org/W2091059698","https://openalex.org/W2107971525","https://openalex.org/W4250027757"],"related_works":["https://openalex.org/W2069287101","https://openalex.org/W2500993308","https://openalex.org/W3109407162","https://openalex.org/W4293832276","https://openalex.org/W2962718424","https://openalex.org/W1999760939","https://openalex.org/W4298343039","https://openalex.org/W2108061486","https://openalex.org/W1983900774","https://openalex.org/W3195219148"],"abstract_inverted_index":{"We":[0],"present":[1],"a":[2,11,30,41,49,87,101,103,113,122],"scalable":[3,136],"design":[4,83,137],"for":[5,59,72],"accelerating":[6],"the":[7,135],"problem":[8],"of":[9,15,51,61,74,106,124],"solving":[10],"dense":[12],"linear":[13,55,68,104],"system":[14,105],"equations":[16],"using":[17],"LU":[18],"Decomposition.":[19],"A":[20],"novel":[21],"systolic":[22],"array":[23],"architecture":[24],"that":[25],"can":[26,109,138],"be":[27,110,139],"used":[28],"as":[29],"building":[31],"block":[32],"in":[33,90],"scientific":[34],"applications":[35],"is":[36],"described":[37],"and":[38,64,126],"prototyped":[39],"on":[40,112],"Xilinx":[42],"Virtex":[43],"6":[44],"FPGA.":[45],"This":[46],"solver":[47],"has":[48],"throughput":[50],"around":[52,65],"3.2":[53],"million":[54],"systems":[56,69],"per":[57,70],"second":[58,71],"matrices":[60,73],"size":[62,75,107,123],"N=4":[63],"80":[66],"thousand":[67],"N=16.":[76],"In":[77],"comparison":[78],"with":[79,145],"similar":[80],"work,":[81],"our":[82],"offers":[84],"up":[85,94],"to":[86,95,121,128,132,141],"12-fold":[88],"improvement":[89],"speed":[91],"whilst":[92],"requiring":[93],"50%":[96],"less":[97],"hardware":[98],"resources.":[99],"As":[100],"result,":[102],"N=64":[108],"implemented":[111],"single":[114],"FPGA,":[115],"whereas":[116],"previous":[117],"work":[118],"was":[119],"limited":[120],"N=12":[125],"resorted":[127],"complex":[129],"multi-FPGA":[130],"architectures":[131],"scale.":[133],"Finally,":[134],"adapted":[140],"different":[142],"sized":[143],"problems":[144],"minimum":[146],"effort.":[147]},"counts_by_year":[{"year":2014,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
