{"id":"https://openalex.org/W4415250782","doi":"https://doi.org/10.1109/hpec67600.2025.11196657","title":"Generalized Methodology for Determining Numerical Features of Hardware Floating-Point Matrix Multipliers: Part I","display_name":"Generalized Methodology for Determining Numerical Features of Hardware Floating-Point Matrix Multipliers: Part I","publication_year":2025,"publication_date":"2025-09-15","ids":{"openalex":"https://openalex.org/W4415250782","doi":"https://doi.org/10.1109/hpec67600.2025.11196657"},"language":"en","primary_location":{"id":"doi:10.1109/hpec67600.2025.11196657","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpec67600.2025.11196657","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE High Performance Extreme Computing Conference (HPEC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5043265309","display_name":"Faizan A. Khattak","orcid":"https://orcid.org/0000-0003-2401-9366"},"institutions":[{"id":"https://openalex.org/I130828816","display_name":"University of Leeds","ror":"https://ror.org/024mrxd33","country_code":"GB","type":"education","lineage":["https://openalex.org/I130828816"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Faizan A. Khattak","raw_affiliation_strings":["University of Leeds,School of Computer Science,Leeds,UK"],"affiliations":[{"raw_affiliation_string":"University of Leeds,School of Computer Science,Leeds,UK","institution_ids":["https://openalex.org/I130828816"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5025035386","display_name":"Mantas Mikaitis","orcid":"https://orcid.org/0000-0001-8706-1436"},"institutions":[{"id":"https://openalex.org/I130828816","display_name":"University of Leeds","ror":"https://ror.org/024mrxd33","country_code":"GB","type":"education","lineage":["https://openalex.org/I130828816"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Mantas Mikaitis","raw_affiliation_strings":["University of Leeds,School of Computer Science,Leeds,UK"],"affiliations":[{"raw_affiliation_string":"University of Leeds,School of Computer Science,Leeds,UK","institution_ids":["https://openalex.org/I130828816"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5043265309"],"corresponding_institution_ids":["https://openalex.org/I130828816"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.3212315,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10792","display_name":"Matrix Theory and Algorithms","score":0.9629999995231628,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10792","display_name":"Matrix Theory and Algorithms","score":0.9629999995231628,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13983","display_name":"Cybersecurity and Information Systems","score":0.9519000053405762,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12898","display_name":"Induction Heating and Inverter Technology","score":0.9225000143051147,"subfield":{"id":"https://openalex.org/subfields/2210","display_name":"Mechanical Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/double-precision-floating-point-format","display_name":"Double-precision floating-point format","score":0.5586000084877014},{"id":"https://openalex.org/keywords/graphics","display_name":"Graphics","score":0.5515000224113464},{"id":"https://openalex.org/keywords/floating-point","display_name":"Floating point","score":0.5271999835968018},{"id":"https://openalex.org/keywords/scheme","display_name":"Scheme (mathematics)","score":0.49630001187324524},{"id":"https://openalex.org/keywords/multiplier","display_name":"Multiplier (economics)","score":0.49570000171661377},{"id":"https://openalex.org/keywords/matrix","display_name":"Matrix (chemical analysis)","score":0.47510001063346863},{"id":"https://openalex.org/keywords/range","display_name":"Range (aeronautics)","score":0.43939998745918274},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.4108000099658966}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7307999730110168},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.6128000020980835},{"id":"https://openalex.org/C35912277","wikidata":"https://www.wikidata.org/wiki/Q1243369","display_name":"Double-precision floating-point format","level":3,"score":0.5586000084877014},{"id":"https://openalex.org/C21442007","wikidata":"https://www.wikidata.org/wiki/Q1027879","display_name":"Graphics","level":2,"score":0.5515000224113464},{"id":"https://openalex.org/C84211073","wikidata":"https://www.wikidata.org/wiki/Q117879","display_name":"Floating point","level":2,"score":0.5271999835968018},{"id":"https://openalex.org/C77618280","wikidata":"https://www.wikidata.org/wiki/Q1155772","display_name":"Scheme (mathematics)","level":2,"score":0.49630001187324524},{"id":"https://openalex.org/C124584101","wikidata":"https://www.wikidata.org/wiki/Q1053266","display_name":"Multiplier (economics)","level":2,"score":0.49570000171661377},{"id":"https://openalex.org/C106487976","wikidata":"https://www.wikidata.org/wiki/Q685816","display_name":"Matrix (chemical analysis)","level":2,"score":0.47510001063346863},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.4456000030040741},{"id":"https://openalex.org/C204323151","wikidata":"https://www.wikidata.org/wiki/Q905424","display_name":"Range (aeronautics)","level":2,"score":0.43939998745918274},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.4291999936103821},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.42640000581741333},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.4108000099658966},{"id":"https://openalex.org/C133095886","wikidata":"https://www.wikidata.org/wiki/Q1307173","display_name":"Single-precision floating-point format","level":3,"score":0.38609999418258667},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.36390000581741333},{"id":"https://openalex.org/C28719098","wikidata":"https://www.wikidata.org/wiki/Q44946","display_name":"Point (geometry)","level":2,"score":0.35269999504089355},{"id":"https://openalex.org/C16910744","wikidata":"https://www.wikidata.org/wiki/Q7705759","display_name":"Test data","level":2,"score":0.3463999927043915},{"id":"https://openalex.org/C17349429","wikidata":"https://www.wikidata.org/wiki/Q1049914","display_name":"Matrix multiplication","level":3,"score":0.34049999713897705},{"id":"https://openalex.org/C162319229","wikidata":"https://www.wikidata.org/wiki/Q175263","display_name":"Data structure","level":2,"score":0.32760000228881836},{"id":"https://openalex.org/C48753275","wikidata":"https://www.wikidata.org/wiki/Q11216","display_name":"Numerical analysis","level":2,"score":0.3215999901294708},{"id":"https://openalex.org/C18945957","wikidata":"https://www.wikidata.org/wiki/Q5597193","display_name":"Graphics hardware","level":3,"score":0.3172999918460846},{"id":"https://openalex.org/C121684516","wikidata":"https://www.wikidata.org/wiki/Q7600677","display_name":"Computer graphics (images)","level":1,"score":0.3151000142097473},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.31290000677108765},{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.28630000352859497},{"id":"https://openalex.org/C83283714","wikidata":"https://www.wikidata.org/wiki/Q121117","display_name":"Supercomputer","level":2,"score":0.2728999853134155},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.2687999904155731},{"id":"https://openalex.org/C138958017","wikidata":"https://www.wikidata.org/wiki/Q190087","display_name":"Data type","level":2,"score":0.2630000114440918}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/hpec67600.2025.11196657","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpec67600.2025.11196657","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE High Performance Extreme Computing Conference (HPEC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":6,"referenced_works":["https://openalex.org/W2981323010","https://openalex.org/W3009762362","https://openalex.org/W3021029305","https://openalex.org/W4392309059","https://openalex.org/W4403211919","https://openalex.org/W4411115393"],"related_works":[],"abstract_inverted_index":{"Numerical":[0],"features":[1,113,143],"of":[2,26,30,89,114,144,153,173],"matrix":[3,115,174],"multiplier":[4],"hardware":[5],"units":[6],"in":[7],"NVIDIA":[8,45,178],"and":[9,23,56,102,110,120,126,128,183,187],"AMD":[10],"data":[11,157],"centre":[12,158],"GPUs":[13,46],"have":[14,93],"recently":[15],"been":[16],"studied.":[17],"Features":[18],"such":[19],"as":[20],"rounding,":[21],"normalisation,":[22],"internal":[24],"precision":[25,58],"the":[27,37,63,95,98,141,154,193],"accumulators":[28],"are":[29,80,149],"interest.":[31],"In":[32],"this":[33],"paper,":[34],"we":[35],"extend":[36],"methodology":[38,135],"for":[39,53,117,170],"analysing":[40],"those":[41,152],"features,":[42],"to":[43,85,97,138,151,166],"consumer-grade":[44,147],"by":[47],"implementing":[48],"an":[49,71],"architecture-independent":[50],"test":[51,65],"scheme":[52,96],"various":[54],"input":[55,122],"output":[57,132],"formats.":[59,91,133,197],"Unlike":[60],"current":[61],"approaches,":[62],"proposed":[64],"vector":[66],"generation":[67],"method":[68],"neither":[69],"performs":[70],"exhaustive":[72],"search":[73],"nor":[74],"relies":[75],"on":[76,176],"hard-coded":[77],"constants":[78],"that":[79,140],"device-specific,":[81],"yet":[82],"remains":[83],"applicable":[84],"a":[86,146,156],"wide":[87],"range":[88],"mixed-precision":[90],"We":[92,160],"applied":[94],"RTX-3060":[99],"(Ampere":[100],"architecture),":[101],"Ada":[103],"RTX-1000":[104],"(Ada":[105],"Lovelace":[106],"architecture)":[107],"graphics":[108],"cards":[109],"determined":[111],"numerical":[112,142],"multipliers":[116,175],"binary16,":[118],"TensorFloat32,":[119],"bfloat16":[121],"floating":[123],"point":[124],"formats":[125],"binary16":[127],"binary32":[129],"IEEE":[130],"754":[131],"Our":[134],"allowed":[136],"us":[137],"determine":[139],"RTX-3060,":[145],"GPU,":[148],"identical":[150],"A100,":[155],"GPU.":[159],"do":[161],"not":[162],"expect":[163],"our":[164],"code":[165],"require":[167],"any":[168,188],"changes":[169],"performing":[171],"analysis":[172],"newer":[177],"GPUs,":[179],"Hopper":[180],"or":[181],"Blackwell,":[182],"their":[184],"future":[185],"successors,":[186],"input/output":[189],"format":[190],"combination,":[191],"including":[192],"latest":[194],"8-bit":[195],"floating-point":[196]},"counts_by_year":[],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-10-16T00:00:00"}
