{"id":"https://openalex.org/W2560977807","doi":"https://doi.org/10.1109/tpds.2018.2803820","title":"Efficient Realization of Householder Transform Through Algorithm-Architecture Co-Design for Acceleration of QR Factorization","display_name":"Efficient Realization of Householder Transform Through Algorithm-Architecture Co-Design for Acceleration of QR Factorization","publication_year":2018,"publication_date":"2018-02-07","ids":{"openalex":"https://openalex.org/W2560977807","doi":"https://doi.org/10.1109/tpds.2018.2803820","mag":"2560977807"},"language":"en","primary_location":{"id":"doi:10.1109/tpds.2018.2803820","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpds.2018.2803820","pdf_url":null,"source":{"id":"https://openalex.org/S97130795","display_name":"IEEE Transactions on Parallel and Distributed Systems","issn_l":"1045-9219","issn":["1045-9219","1558-2183","2161-9883"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Parallel and Distributed Systems","raw_type":"journal-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/1612.04470","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Farhad Merchant","orcid":"https://orcid.org/0000-0002-3708-5621"},"institutions":[{"id":"https://openalex.org/I887968799","display_name":"RWTH Aachen University","ror":"https://ror.org/04xfq0f34","country_code":"DE","type":"education","lineage":["https://openalex.org/I887968799"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Farhad Merchant","raw_affiliation_strings":["Institute for Communication Technologies and Embedded Systems, RWTH Aachen University, Aachen, Germany"],"raw_orcid":"https://orcid.org/0000-0002-3708-5621","affiliations":[{"raw_affiliation_string":"Institute for Communication Technologies and Embedded Systems, RWTH Aachen University, Aachen, Germany","institution_ids":["https://openalex.org/I887968799"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Tarun Vatwani","orcid":null},"institutions":[{"id":"https://openalex.org/I172675005","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302","country_code":"SG","type":"education","lineage":["https://openalex.org/I172675005"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Tarun Vatwani","raw_affiliation_strings":["School of Computer Science and Engineering, Nanyang Technological University, Singapore"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering, Nanyang Technological University, Singapore","institution_ids":["https://openalex.org/I172675005"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Anupam Chattopadhyay","orcid":"https://orcid.org/0000-0002-8818-6983"},"institutions":[{"id":"https://openalex.org/I172675005","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302","country_code":"SG","type":"education","lineage":["https://openalex.org/I172675005"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Anupam Chattopadhyay","raw_affiliation_strings":["School of Computer Science and Engineering, Nanyang Technological University, Singapore"],"raw_orcid":"https://orcid.org/0000-0002-8818-6983","affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering, Nanyang Technological University, Singapore","institution_ids":["https://openalex.org/I172675005"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Soumyendu Raha","orcid":null},"institutions":[{"id":"https://openalex.org/I59270414","display_name":"Indian Institute of Science Bangalore","ror":"https://ror.org/04dese585","country_code":"IN","type":"education","lineage":["https://openalex.org/I59270414"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Soumyendu Raha","raw_affiliation_strings":["Indian Institute of Science, Bangalore, Karnataka, India"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Indian Institute of Science, Bangalore, Karnataka, India","institution_ids":["https://openalex.org/I59270414"]}]},{"author_position":"middle","author":{"id":null,"display_name":"S. K. Nandy","orcid":null},"institutions":[{"id":"https://openalex.org/I59270414","display_name":"Indian Institute of Science Bangalore","ror":"https://ror.org/04dese585","country_code":"IN","type":"education","lineage":["https://openalex.org/I59270414"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"S. K. Nandy","raw_affiliation_strings":["Indian Institute of Science, Bangalore, Karnataka, India"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Indian Institute of Science, Bangalore, Karnataka, India","institution_ids":["https://openalex.org/I59270414"]}]},{"author_position":"last","author":{"id":null,"display_name":"Ranjani Narayan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ranjani Narayan","raw_affiliation_strings":["Morphing Machines Pvt. LTd., Bengaluru, Karnataka, India"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Morphing Machines Pvt. LTd., Bengaluru, Karnataka, India","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.9003,"has_fulltext":false,"cited_by_count":14,"citation_normalized_percentile":{"value":0.8628288,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":"29","issue":"8","first_page":"1707","last_page":"1720"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.20600000023841858,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.20600000023841858,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10829","display_name":"Interconnection Networks and Systems","score":0.16339999437332153,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11034","display_name":"Digital Filter Design and Implementation","score":0.15049999952316284,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/qr-decomposition","display_name":"QR decomposition","score":0.7157999873161316},{"id":"https://openalex.org/keywords/realization","display_name":"Realization (probability)","score":0.629800021648407},{"id":"https://openalex.org/keywords/factorization","display_name":"Factorization","score":0.5497999787330627},{"id":"https://openalex.org/keywords/linear-algebra","display_name":"Linear algebra","score":0.5289000272750854},{"id":"https://openalex.org/keywords/graphics","display_name":"Graphics","score":0.46000000834465027},{"id":"https://openalex.org/keywords/multi-core-processor","display_name":"Multi-core processor","score":0.45019999146461487},{"id":"https://openalex.org/keywords/cuda","display_name":"CUDA","score":0.43299999833106995},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.40290001034736633},{"id":"https://openalex.org/keywords/acceleration","display_name":"Acceleration","score":0.3797999918460846},{"id":"https://openalex.org/keywords/task-parallelism","display_name":"Task parallelism","score":0.37860000133514404}],"concepts":[{"id":"https://openalex.org/C188060507","wikidata":"https://www.wikidata.org/wiki/Q653242","display_name":"QR decomposition","level":3,"score":0.7157999873161316},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6861000061035156},{"id":"https://openalex.org/C2781089630","wikidata":"https://www.wikidata.org/wiki/Q21856745","display_name":"Realization (probability)","level":2,"score":0.629800021648407},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.6169000267982483},{"id":"https://openalex.org/C187834632","wikidata":"https://www.wikidata.org/wiki/Q188804","display_name":"Factorization","level":2,"score":0.5497999787330627},{"id":"https://openalex.org/C139352143","wikidata":"https://www.wikidata.org/wiki/Q82571","display_name":"Linear algebra","level":2,"score":0.5289000272750854},{"id":"https://openalex.org/C21442007","wikidata":"https://www.wikidata.org/wiki/Q1027879","display_name":"Graphics","level":2,"score":0.46000000834465027},{"id":"https://openalex.org/C78766204","wikidata":"https://www.wikidata.org/wiki/Q555032","display_name":"Multi-core processor","level":2,"score":0.45019999146461487},{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.43299999833106995},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.421099990606308},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.40290001034736633},{"id":"https://openalex.org/C117896860","wikidata":"https://www.wikidata.org/wiki/Q11376","display_name":"Acceleration","level":2,"score":0.3797999918460846},{"id":"https://openalex.org/C42992933","wikidata":"https://www.wikidata.org/wiki/Q691169","display_name":"Task parallelism","level":3,"score":0.37860000133514404},{"id":"https://openalex.org/C2779851693","wikidata":"https://www.wikidata.org/wiki/Q183484","display_name":"Graphics processing unit","level":2,"score":0.36419999599456787},{"id":"https://openalex.org/C106515295","wikidata":"https://www.wikidata.org/wiki/Q26806595","display_name":"Parallel processing","level":2,"score":0.3425000011920929},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.33500000834465027},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.32429999113082886},{"id":"https://openalex.org/C2781172179","wikidata":"https://www.wikidata.org/wiki/Q853109","display_name":"Parallelism (grammar)","level":2,"score":0.32030001282691956},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3102000057697296},{"id":"https://openalex.org/C61483411","wikidata":"https://www.wikidata.org/wiki/Q3124522","display_name":"Data parallelism","level":3,"score":0.30000001192092896},{"id":"https://openalex.org/C9652623","wikidata":"https://www.wikidata.org/wiki/Q190109","display_name":"Field (mathematics)","level":2,"score":0.2978000044822693},{"id":"https://openalex.org/C120373497","wikidata":"https://www.wikidata.org/wiki/Q1087987","display_name":"Parallel algorithm","level":2,"score":0.2944999933242798},{"id":"https://openalex.org/C42355184","wikidata":"https://www.wikidata.org/wiki/Q1361088","display_name":"Matrix decomposition","level":3,"score":0.2854999899864197},{"id":"https://openalex.org/C163834973","wikidata":"https://www.wikidata.org/wiki/Q2004891","display_name":"Numerical linear algebra","level":3,"score":0.28450000286102295},{"id":"https://openalex.org/C6802819","wikidata":"https://www.wikidata.org/wiki/Q1072174","display_name":"Linear system","level":2,"score":0.28380000591278076},{"id":"https://openalex.org/C35912277","wikidata":"https://www.wikidata.org/wiki/Q1243369","display_name":"Double-precision floating-point format","level":3,"score":0.27219998836517334},{"id":"https://openalex.org/C13164978","wikidata":"https://www.wikidata.org/wiki/Q600158","display_name":"Hardware acceleration","level":3,"score":0.27079999446868896},{"id":"https://openalex.org/C150741067","wikidata":"https://www.wikidata.org/wiki/Q2377218","display_name":"Systolic array","level":3,"score":0.26750001311302185},{"id":"https://openalex.org/C50630238","wikidata":"https://www.wikidata.org/wiki/Q971505","display_name":"General-purpose computing on graphics processing units","level":3,"score":0.26669999957084656},{"id":"https://openalex.org/C114237110","wikidata":"https://www.wikidata.org/wiki/Q114901","display_name":"Gate array","level":3,"score":0.2653000056743622},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.26019999384880066},{"id":"https://openalex.org/C106516650","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm design","level":2,"score":0.258899986743927}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/tpds.2018.2803820","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpds.2018.2803820","pdf_url":null,"source":{"id":"https://openalex.org/S97130795","display_name":"IEEE Transactions on Parallel and Distributed Systems","issn_l":"1045-9219","issn":["1045-9219","1558-2183","2161-9883"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Parallel and Distributed Systems","raw_type":"journal-article"},{"id":"pmh:oai:arXiv.org:1612.04470","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1612.04470","pdf_url":"https://arxiv.org/pdf/1612.04470","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"pmh:oai:dr.ntu.edu.sg:10356/142088","is_oa":false,"landing_page_url":"https://hdl.handle.net/10356/142088","pdf_url":null,"source":{"id":"https://openalex.org/S4306402609","display_name":"DR-NTU (Nanyang Technological University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I172675005","host_organization_name":"Nanyang Technological University","host_organization_lineage":["https://openalex.org/I172675005"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Journal Article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:1612.04470","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1612.04470","pdf_url":"https://arxiv.org/pdf/1612.04470","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":31,"referenced_works":["https://openalex.org/W1509188292","https://openalex.org/W1968238550","https://openalex.org/W1975322256","https://openalex.org/W1977514790","https://openalex.org/W1995323914","https://openalex.org/W2002257715","https://openalex.org/W2008020492","https://openalex.org/W2030640282","https://openalex.org/W2039778940","https://openalex.org/W2044433757","https://openalex.org/W2050343899","https://openalex.org/W2054855408","https://openalex.org/W2056882228","https://openalex.org/W2057636763","https://openalex.org/W2062013887","https://openalex.org/W2067706025","https://openalex.org/W2076027962","https://openalex.org/W2079942837","https://openalex.org/W2083246623","https://openalex.org/W2092590148","https://openalex.org/W2124433837","https://openalex.org/W2134993379","https://openalex.org/W2295685520","https://openalex.org/W2296571440","https://openalex.org/W2298971830","https://openalex.org/W2534273435","https://openalex.org/W2546085552","https://openalex.org/W4230289604","https://openalex.org/W6728766708","https://openalex.org/W6729748814","https://openalex.org/W6738901689"],"related_works":[],"abstract_inverted_index":{"QR":[0,24,164,215],"factorization":[1,25,165],"is":[2,63,75,110,120,150,253,287],"a":[3,78,181,233],"ubiquitous":[4],"operation":[5],"in":[6,85,106,114,133,167,222,244,247],"many":[7],"engineering":[8],"and":[9,53,57,136,145,189,212,227,264,284],"scientific":[10],"applications.":[11],"In":[12],"this":[13],"paper,":[14],"we":[15,30,217],"present":[16],"efficient":[17],"realization":[18,209,250],"of":[19,34,37,60,71,80,91,98,152,200,210,225,251,255],"Householder":[20],"Transform":[21],"(HT)":[22],"based":[23,163,214],"through":[26],"algorithm-architecture":[27],"co-design":[28],"where":[29,73,118],"achieve":[31],"performance":[32,158,196,260,268],"improvement":[33,261,269],"3-90x":[35],"in-terms":[36],"Gflops/watt":[38],"over":[39,262,276],"state-of-the-art":[40],"multicore,":[41],"General":[42,137,272],"Purpose":[43,138],"Graphics":[44,139],"Processing":[45,140],"Units":[46,141],"(GPGPUs),":[47],"Field":[48],"Programmable":[49],"Gate":[50],"Arrays":[51],"(FPGAs),":[52],"ClearSpeed":[54],"CSX700.":[55],"Theoretical":[56],"experimental":[58],"analysis":[59,97],"classical":[61,99,108,130,161,201],"HT":[62,109,116,144,162,202,211,226],"performed":[64],"for":[65,143,172,184,282],"opportunities":[66],"to":[67,103,160,242],"exhibit":[68],"higher":[69,127],"degree":[70],"parallelism":[72,74,128],"quantified":[76],"as":[77],"number":[79],"parallel":[81],"operations":[82,221],"per":[83],"level":[84],"the":[86,92,107,168,204,223,245,267],"Directed":[87],"Acyclic":[88],"Graph":[89],"(DAG)":[90],"transform.":[93],"Based":[94],"on":[95,180,203,232],"theoretical":[96],"HT,":[100],"an":[101],"opportunity":[102],"re-arrange":[104],"computations":[105,246],"identified":[111],"that":[112,122,148,191,229,240],"results":[113],"Modified":[115],"(MHT)":[117],"it":[119],"shown":[121],"MHT":[123,146,149,179,192,213,228,252],"exhibits":[124],"1.33x":[125],"times":[126],"than":[129,197,266],"HT.":[131],"Experiments":[132],"off-the-shelf":[134],"multicore":[135,263,283],"(GPGPUs)":[142],"suggest":[147],"capable":[151,254],"achieving":[153,256],"slightly":[154],"better":[155,195,259],"or":[156],"equal":[157],"compared":[159],"realizations":[166],"optimized":[169],"software":[170,280],"packages":[171,281],"Dense":[173,185],"Linear":[174,186],"Algebra":[175,187],"(DLA).":[176],"We":[177,237],"implement":[178],"customized":[182],"platform":[183],"(DLA)":[188],"show":[190],"achieves":[193],"1.3x":[194],"native":[198],"implementation":[199],"same":[205],"accelerator.":[206],"For":[207],"custom":[208,249],"factorization,":[216],"also":[218,238],"identify":[219],"macro":[220],"DAGs":[224],"are":[230],"realized":[231],"Reconfigurable":[234],"Data-path":[235],"(RDP).":[236],"observe":[239],"due":[241],"re-arrangement":[243],"MHT,":[248],"12":[257],"percent":[258],"GPGPUs":[265,285],"reported":[270],"by":[271],"Matrix":[273],"Multiplication":[274],"(GEMM)":[275],"highly":[277],"tuned":[278],"DLA":[279],"which":[286],"counter-intuitive.":[288]},"counts_by_year":[{"year":2024,"cited_by_count":3},{"year":2022,"cited_by_count":3},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":2},{"year":2019,"cited_by_count":3},{"year":2018,"cited_by_count":2}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2017-01-06T00:00:00"}
