{"id":"https://openalex.org/W4413278499","doi":"https://doi.org/10.1109/icfpt64416.2024.11113395","title":"FLUD: A Scalable and Configurable Systolic Array Design for LU Decomposition on FPGAs","display_name":"FLUD: A Scalable and Configurable Systolic Array Design for LU Decomposition on FPGAs","publication_year":2024,"publication_date":"2024-12-10","ids":{"openalex":"https://openalex.org/W4413278499","doi":"https://doi.org/10.1109/icfpt64416.2024.11113395"},"language":"en","primary_location":{"id":"doi:10.1109/icfpt64416.2024.11113395","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icfpt64416.2024.11113395","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 International Conference on Field Programmable Technology (ICFPT)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5047352513","display_name":"Xingyu Tian","orcid":"https://orcid.org/0000-0001-6244-2101"},"institutions":[{"id":"https://openalex.org/I18014758","display_name":"Simon Fraser University","ror":"https://ror.org/0213rcc28","country_code":"CA","type":"education","lineage":["https://openalex.org/I18014758"]}],"countries":["CA"],"is_corresponding":true,"raw_author_name":"Xingyu Tian","raw_affiliation_strings":["Simon Fraser University"],"affiliations":[{"raw_affiliation_string":"Simon Fraser University","institution_ids":["https://openalex.org/I18014758"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100611279","display_name":"Geng Yang","orcid":"https://orcid.org/0000-0001-6921-1007"},"institutions":[{"id":"https://openalex.org/I149594827","display_name":"Xidian University","ror":"https://ror.org/05s92vm98","country_code":"CN","type":"education","lineage":["https://openalex.org/I149594827"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Geng Yang","raw_affiliation_strings":["Xidian University"],"affiliations":[{"raw_affiliation_string":"Xidian University","institution_ids":["https://openalex.org/I149594827"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5107843533","display_name":"Zhenman Fang","orcid":null},"institutions":[{"id":"https://openalex.org/I18014758","display_name":"Simon Fraser University","ror":"https://ror.org/0213rcc28","country_code":"CA","type":"education","lineage":["https://openalex.org/I18014758"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Zhenman Fang","raw_affiliation_strings":["Simon Fraser University"],"affiliations":[{"raw_affiliation_string":"Simon Fraser University","institution_ids":["https://openalex.org/I18014758"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5047352513"],"corresponding_institution_ids":["https://openalex.org/I18014758"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.35158943,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"01","last_page":"09"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10904","display_name":"Embedded Systems Design Techniques","score":0.9883999824523926,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10904","display_name":"Embedded Systems Design Techniques","score":0.9883999824523926,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11032","display_name":"VLSI and Analog Circuit Testing","score":0.9204999804496765,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11034","display_name":"Digital Filter Design and Implementation","score":0.906000018119812,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/systolic-array","display_name":"Systolic array","score":0.8106604814529419},{"id":"https://openalex.org/keywords/field-programmable-gate-array","display_name":"Field-programmable gate array","score":0.7609845399856567},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.6983653903007507},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6393315196037292},{"id":"https://openalex.org/keywords/decomposition","display_name":"Decomposition","score":0.5831722617149353},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.5766478776931763},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.4742935001850128},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.27388685941696167},{"id":"https://openalex.org/keywords/very-large-scale-integration","display_name":"Very-large-scale integration","score":0.17960655689239502},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.07804268598556519},{"id":"https://openalex.org/keywords/chemistry","display_name":"Chemistry","score":0.06106248497962952}],"concepts":[{"id":"https://openalex.org/C150741067","wikidata":"https://www.wikidata.org/wiki/Q2377218","display_name":"Systolic array","level":3,"score":0.8106604814529419},{"id":"https://openalex.org/C42935608","wikidata":"https://www.wikidata.org/wiki/Q190411","display_name":"Field-programmable gate array","level":2,"score":0.7609845399856567},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.6983653903007507},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6393315196037292},{"id":"https://openalex.org/C124681953","wikidata":"https://www.wikidata.org/wiki/Q339062","display_name":"Decomposition","level":2,"score":0.5831722617149353},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.5766478776931763},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.4742935001850128},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.27388685941696167},{"id":"https://openalex.org/C14580979","wikidata":"https://www.wikidata.org/wiki/Q876049","display_name":"Very-large-scale integration","level":2,"score":0.17960655689239502},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.07804268598556519},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.06106248497962952},{"id":"https://openalex.org/C178790620","wikidata":"https://www.wikidata.org/wiki/Q11351","display_name":"Organic chemistry","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icfpt64416.2024.11113395","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icfpt64416.2024.11113395","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 International Conference on Field Programmable Technology (ICFPT)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.5699999928474426,"id":"https://metadata.un.org/sdg/7","display_name":"Affordable and clean energy"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":16,"referenced_works":["https://openalex.org/W2053452553","https://openalex.org/W2097503653","https://openalex.org/W2107971623","https://openalex.org/W2136001068","https://openalex.org/W2147282769","https://openalex.org/W2152320213","https://openalex.org/W2794478543","https://openalex.org/W2885289863","https://openalex.org/W3138530731","https://openalex.org/W4210859512","https://openalex.org/W4252821989","https://openalex.org/W4312258136","https://openalex.org/W4313894250","https://openalex.org/W4320800818","https://openalex.org/W4378805959","https://openalex.org/W4386830976"],"related_works":["https://openalex.org/W4240320454","https://openalex.org/W2070314832","https://openalex.org/W2395557210","https://openalex.org/W2111241003","https://openalex.org/W2010232134","https://openalex.org/W2347854075","https://openalex.org/W1967938402","https://openalex.org/W2132614232","https://openalex.org/W2386041993","https://openalex.org/W1608572506"],"abstract_inverted_index":{"Lower-upper":[0],"decomposition":[1],"(LUD)":[2],"is":[3,146,292],"one":[4,47],"of":[5,50,169,184,194,222,278],"the":[6,51,114,122,192,214,220,246,259,305],"most":[7],"popular":[8],"matrix":[9,67,128],"factorization":[10],"techniques":[11],"in":[12,20,250],"linear":[13],"algebra":[14],"and":[15,23,82,130,135,152,180,262,322],"has":[16],"been":[17],"widely":[18],"used":[19],"many":[21],"scientific":[22],"engineering":[24],"applications.":[25],"While":[26],"prior":[27],"studies":[28],"have":[29],"investigated":[30],"various":[31],"strategies":[32],"to":[33,59,79,98,116,125,154,172,211,227,243,304],"accelerate":[34,117,126],"block":[35,118],"LUD":[36,150,199,224],"on":[37,113,233,266,309],"FPGAs":[38],"for":[39,65,287],"arbitrary":[40,155],"input":[41,156],"sizes,":[42],"they":[43],"often":[44],"suffer":[45],"from":[46],"or":[48,92],"more":[49],"following":[52],"limitations:":[53],"1)":[54],"excessive":[55,74],"resource":[56,163,195],"utilization":[57],"due":[58,78,97],"separate":[60],"PE":[61,190],"(processing":[62],"element)":[63],"designs":[64],"different":[66,127,149,198,223],"blocks":[68,129,151],"with":[69,177],"diverse":[70],"computation":[71,209],"patterns;":[72],"2)":[73],"on-chip":[75],"memory":[76],"usage":[77],"buffer-based":[80],"designs;":[81],"3)":[83],"insufficient":[84],"parallelism":[85,89,134],"as":[86],"only":[87],"one-level":[88],"(either":[90],"row-level":[91],"iteration-level":[93],"parallelism)":[94],"was":[95],"exploited":[96],"complex":[99],"dependencies.":[100],"To":[101,158],"address":[102],"those":[103],"limitations,":[104],"we":[105,238],"propose":[106],"FLUD,":[107],"a":[108,141,167,202,208,234,275,310],"streamingbased":[109],"systolic":[110,123,143,216],"array":[111,124,144,217],"design":[112,218,230,249,260],"EPGA":[115],"LUD,":[119,290],"which":[120,291],"shares":[121],"exploits":[131],"both":[132],"column-level":[133],"iterationlevel":[136],"parallelism.":[137],"First,":[138],"FLUD":[139,165,206,248,273,317],"implements":[140],"configurable":[142],"that":[145,272],"shared":[147],"by":[148],"scalable":[153],"sizes.":[157],"further":[159],"optimize":[160],"its":[161],"hardware":[162],"efficiency,":[164],"groups":[166],"column":[168],"PEs":[170],"together":[171],"replace":[173],"their":[174,267],"FIFO":[175],"connections":[176],"lightweight":[178],"registers":[179],"reduce":[181],"multiple":[182],"copies":[183],"local":[185],"control":[186],"logic":[187],"inside":[188],"each":[189],"(for":[191],"purpose":[193],"sharing":[196],"among":[197,219],"blocks)":[200],"into":[201],"global":[203],"one.":[204],"Moreover,":[205],"devises":[207],"schedule":[210],"effectively":[212],"share":[213],"highly-optimized":[215],"execution":[221],"blocks.":[225],"Lastly,":[226],"enable":[228],"fast":[229],"space":[231],"exploration":[232],"given":[235],"FPGA":[236,301],"platform,":[237],"develop":[239],"an":[240],"automation":[241],"tool":[242],"automatically":[244],"generate":[245],"optimized":[247],"Vitis":[251],"high-level":[252],"synthesis":[253],"(HLS),":[254],"where":[255],"users":[256],"can":[257],"configure":[258],"size":[261],"data":[263],"precision":[264],"based":[265],"needs.":[268],"Experimental":[269],"results":[270],"demonstrate":[271],"achieves":[274,318],"peak":[276],"throughput":[277,321],"<tex":[279,294,323],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[280,295,324],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">$\\mathbf{4":[281],"2":[282],"7.":[283],"9":[284],"5}$</tex>":[285],"GFLOPS":[286],"single-precision":[288],"floatingpoint":[289],"about":[293],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">$3":[296],"\\times$</tex>":[297],"faster":[298],"than":[299],"state-of-the-art":[300],"design.":[302],"Compared":[303],"LAPACK":[306],"library":[307],"running":[308],"12":[311],"-core":[312],"Xeon":[313],"Silver":[314],"4214":[315],"CPU,":[316],"4.71x":[319],"higher":[320],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">$10.25":[325],"x$</tex>":[326],"better":[327],"throughput/watt.":[328]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
