{"id":"https://openalex.org/W2022304554","doi":"https://doi.org/10.1109/fccm.2014.26","title":"Breaking Sequential Dependencies in FPGA-Based Sparse LU Factorization","display_name":"Breaking Sequential Dependencies in FPGA-Based Sparse LU Factorization","publication_year":2014,"publication_date":"2014-05-01","ids":{"openalex":"https://openalex.org/W2022304554","doi":"https://doi.org/10.1109/fccm.2014.26","mag":"2022304554"},"language":"en","primary_location":{"id":"doi:10.1109/fccm.2014.26","is_oa":false,"landing_page_url":"https://doi.org/10.1109/fccm.2014.26","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2014 IEEE 22nd Annual International Symposium on Field-Programmable Custom Computing Machines","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://dr.ntu.edu.sg/bitstream/10356/81075/1/Breaking%20Sequential%20Dependencies%20in%20FPGA-Based%20Sparse%20LU%20Factorization.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101990451","display_name":"Siddhartha","orcid":null},"institutions":[{"id":"https://openalex.org/I172675005","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302","country_code":"SG","type":"education","lineage":["https://openalex.org/I172675005"]}],"countries":["SG"],"is_corresponding":true,"raw_author_name":"Siddhartha","raw_affiliation_strings":["Nanyang Technological University, Singapore","Nanyang Tech. Univ., Singapore, Singapore"],"affiliations":[{"raw_affiliation_string":"Nanyang Technological University, Singapore","institution_ids":["https://openalex.org/I172675005"]},{"raw_affiliation_string":"Nanyang Tech. Univ., Singapore, Singapore","institution_ids":["https://openalex.org/I172675005"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5015534628","display_name":"Nachiket Kapre","orcid":"https://orcid.org/0000-0002-2187-0406"},"institutions":[{"id":"https://openalex.org/I172675005","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302","country_code":"SG","type":"education","lineage":["https://openalex.org/I172675005"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Nachiket Kapre","raw_affiliation_strings":["Nanyang Technological University, Singapore","Nanyang Tech. Univ., Singapore, Singapore"],"affiliations":[{"raw_affiliation_string":"Nanyang Technological University, Singapore","institution_ids":["https://openalex.org/I172675005"]},{"raw_affiliation_string":"Nanyang Tech. Univ., Singapore, Singapore","institution_ids":["https://openalex.org/I172675005"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5101990451"],"corresponding_institution_ids":["https://openalex.org/I172675005"],"apc_list":null,"apc_paid":null,"fwci":1.5324,"has_fulltext":true,"cited_by_count":6,"citation_normalized_percentile":{"value":0.82286677,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"60","last_page":"63"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10829","display_name":"Interconnection Networks and Systems","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10363","display_name":"Low-power high-performance VLSI design","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8223622441291809},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.7656330466270447},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.7075290679931641},{"id":"https://openalex.org/keywords/dataflow","display_name":"Dataflow","score":0.6843474507331848},{"id":"https://openalex.org/keywords/bottleneck","display_name":"Bottleneck","score":0.6143422722816467},{"id":"https://openalex.org/keywords/factorization","display_name":"Factorization","score":0.5136422514915466},{"id":"https://openalex.org/keywords/sparse-matrix","display_name":"Sparse matrix","score":0.5087111592292786},{"id":"https://openalex.org/keywords/multi-core-processor","display_name":"Multi-core processor","score":0.5075501203536987},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.44744426012039185},{"id":"https://openalex.org/keywords/field-programmable-gate-array","display_name":"Field-programmable gate array","score":0.41354861855506897},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.3441683053970337},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.2937735319137573}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8223622441291809},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.7656330466270447},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.7075290679931641},{"id":"https://openalex.org/C96324660","wikidata":"https://www.wikidata.org/wiki/Q205446","display_name":"Dataflow","level":2,"score":0.6843474507331848},{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.6143422722816467},{"id":"https://openalex.org/C187834632","wikidata":"https://www.wikidata.org/wiki/Q188804","display_name":"Factorization","level":2,"score":0.5136422514915466},{"id":"https://openalex.org/C56372850","wikidata":"https://www.wikidata.org/wiki/Q1050404","display_name":"Sparse matrix","level":3,"score":0.5087111592292786},{"id":"https://openalex.org/C78766204","wikidata":"https://www.wikidata.org/wiki/Q555032","display_name":"Multi-core processor","level":2,"score":0.5075501203536987},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.44744426012039185},{"id":"https://openalex.org/C42935608","wikidata":"https://www.wikidata.org/wiki/Q190411","display_name":"Field-programmable gate array","level":2,"score":0.41354861855506897},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.3441683053970337},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.2937735319137573},{"id":"https://openalex.org/C163716315","wikidata":"https://www.wikidata.org/wiki/Q901177","display_name":"Gaussian","level":2,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.0},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/fccm.2014.26","is_oa":false,"landing_page_url":"https://doi.org/10.1109/fccm.2014.26","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2014 IEEE 22nd Annual International Symposium on Field-Programmable Custom Computing Machines","raw_type":"proceedings-article"},{"id":"pmh:oai:dr.ntu.edu.sg:10356/81075","is_oa":true,"landing_page_url":"http://hdl.handle.net/10220/39139","pdf_url":"https://dr.ntu.edu.sg/bitstream/10356/81075/1/Breaking%20Sequential%20Dependencies%20in%20FPGA-Based%20Sparse%20LU%20Factorization.pdf","source":{"id":"https://openalex.org/S4306402609","display_name":"DR-NTU (Nanyang Technological University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I172675005","host_organization_name":"Nanyang Technological University","host_organization_lineage":["https://openalex.org/I172675005"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Conference Paper"},{"id":"mag:2022304554","is_oa":false,"landing_page_url":"https://dr.ntu.edu.sg/bitstream/10356/81075/1/Breaking%20Sequential%20Dependencies%20in%20FPGA-Based%20Sparse%20LU%20Factorization.pdf","pdf_url":null,"source":{"id":"https://openalex.org/S4306418418","display_name":"Field-Programmable Custom Computing Machines","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":"Field-Programmable Custom Computing Machines","raw_type":null}],"best_oa_location":{"id":"pmh:oai:dr.ntu.edu.sg:10356/81075","is_oa":true,"landing_page_url":"http://hdl.handle.net/10220/39139","pdf_url":"https://dr.ntu.edu.sg/bitstream/10356/81075/1/Breaking%20Sequential%20Dependencies%20in%20FPGA-Based%20Sparse%20LU%20Factorization.pdf","source":{"id":"https://openalex.org/S4306402609","display_name":"DR-NTU (Nanyang Technological University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I172675005","host_organization_name":"Nanyang Technological University","host_organization_lineage":["https://openalex.org/I172675005"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Conference Paper"},"sustainable_development_goals":[{"display_name":"Industry, innovation and infrastructure","id":"https://metadata.un.org/sdg/9","score":0.5099999904632568}],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2022304554.pdf","grobid_xml":"https://content.openalex.org/works/W2022304554.grobid-xml"},"referenced_works_count":8,"referenced_works":["https://openalex.org/W1970009422","https://openalex.org/W1977343658","https://openalex.org/W2041999884","https://openalex.org/W2051917325","https://openalex.org/W2070571481","https://openalex.org/W2118076222","https://openalex.org/W2160106215","https://openalex.org/W6683401623"],"related_works":["https://openalex.org/W2208934446","https://openalex.org/W2444544770","https://openalex.org/W3160040224","https://openalex.org/W3016904661","https://openalex.org/W1971891790","https://openalex.org/W2112865524","https://openalex.org/W2261685355","https://openalex.org/W2518567779","https://openalex.org/W2489934651","https://openalex.org/W2093056394","https://openalex.org/W2134237243","https://openalex.org/W1974610639","https://openalex.org/W1967946179","https://openalex.org/W2160939887","https://openalex.org/W2963371212","https://openalex.org/W2966582152","https://openalex.org/W3006573990","https://openalex.org/W3106161546","https://openalex.org/W1597444280","https://openalex.org/W2949783914"],"abstract_inverted_index":{"Substitution,":[0],"and":[1,56,59,89,131],"reassociation":[2,90],"of":[3,35,91,115,119,144],"irregular":[4,121],"sparse":[5,41,126],"LU":[6,42,45,127],"factorization":[7,46],"can":[8],"deliver":[9],"up":[10],"to":[11,63,66,80,139,154],"31%":[12],"additional":[13,136],"speedup":[14],"over":[15,39],"an":[16],"existing":[17,54],"state-of-the-art":[18,30],"parallel":[19,117],"FPGA":[20],"implementation":[21,31,104],"where":[22],"further":[23],"parallelization":[24],"was":[25],"deemed":[26],"virtually":[27],"impossible.":[28],"The":[29],"is":[32,47,60,96,101],"already":[33],"capable":[34,114],"delivering":[36],"3\u00d7":[37],"acceleration":[38],"CPU-based":[40],"solvers.":[43],"Sparse":[44],"a":[48,97],"well-known":[49],"computational":[50],"bottleneck":[51],"in":[52,70,135,150],"many":[53],"scientific":[55],"engineering":[57],"applications":[58],"notoriously":[61],"hard":[62],"parallelize":[64],"due":[65,138],"inherent":[67,84],"sequential":[68],"dependencies":[69,85],"the":[71,92,125,133,151],"computation":[72],"graph.":[73],"In":[74],"this":[75],"paper,":[76],"we":[77],"show":[78],"how":[79],"break":[81],"these":[82],"alleged":[83],"using":[86],"depth-limited":[87],"substitution,":[88],"resulting":[93],"computation.":[94,128],"This":[95],"work-parallelism":[98],"tradeoff":[99],"that":[100],"well-suited":[102],"for":[103],"on":[105],"FPGA-based":[106],"token":[107],"dataflow":[108],"architectures.":[109],"Such":[110],"compute":[111,157],"organizations":[112],"are":[113],"fast":[116],"processing":[118],"large":[120],"graphs":[122,153],"extracted":[123],"from":[124],"We":[129,147],"manage":[130],"control":[132],"growth":[134],"work":[137],"substitution":[140,145],"through":[141],"careful":[142],"selection":[143],"depth.":[146],"exploit":[148],"associativity":[149],"generated":[152],"restructure":[155],"long":[156],"chains":[158],"into":[159],"reduction":[160],"trees.":[161]},"counts_by_year":[{"year":2021,"cited_by_count":1},{"year":2017,"cited_by_count":1},{"year":2016,"cited_by_count":1},{"year":2015,"cited_by_count":1},{"year":2014,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
