{"id":"https://openalex.org/W4391326594","doi":"https://doi.org/10.1109/tcsii.2024.3359678","title":"A Low-Cost Floating-Point FMA Unit Supporting Package Operations for HPC-AI Applications","display_name":"A Low-Cost Floating-Point FMA Unit Supporting Package Operations for HPC-AI Applications","publication_year":2024,"publication_date":"2024-01-29","ids":{"openalex":"https://openalex.org/W4391326594","doi":"https://doi.org/10.1109/tcsii.2024.3359678"},"language":"en","primary_location":{"id":"doi:10.1109/tcsii.2024.3359678","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsii.2024.3359678","pdf_url":null,"source":{"id":"https://openalex.org/S93916849","display_name":"IEEE Transactions on Circuits & Systems II Express Briefs","issn_l":"1549-7747","issn":["1549-7747","1558-3791"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems II: Express Briefs","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5019476529","display_name":"Hongbing Tan","orcid":"https://orcid.org/0000-0003-4184-4173"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Hongbing Tan","raw_affiliation_strings":["College of Computer Science and Technology, National University of Defense Technology, Changsha, China"],"raw_orcid":"https://orcid.org/0000-0003-4184-4173","affiliations":[{"raw_affiliation_string":"College of Computer Science and Technology, National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101985792","display_name":"Jing Zhang","orcid":"https://orcid.org/0009-0002-7087-2528"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jing Zhang","raw_affiliation_strings":["College of Computer Science and Technology, National University of Defense Technology, Changsha, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"College of Computer Science and Technology, National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100951697","display_name":"Xiaowei He","orcid":null},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaowei He","raw_affiliation_strings":["College of Computer Science and Technology, National University of Defense Technology, Changsha, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"College of Computer Science and Technology, National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5044840341","display_name":"Libo Huang","orcid":"https://orcid.org/0000-0001-7878-3998"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Libo Huang","raw_affiliation_strings":["College of Computer Science and Technology, National University of Defense Technology, Changsha, China"],"raw_orcid":"https://orcid.org/0000-0001-7878-3998","affiliations":[{"raw_affiliation_string":"College of Computer Science and Technology, National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5070632746","display_name":"Yongwen Wang","orcid":"https://orcid.org/0009-0008-2514-2052"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yongwen Wang","raw_affiliation_strings":["College of Computer Science and Technology, National University of Defense Technology, Changsha, China"],"raw_orcid":"https://orcid.org/0009-0008-2514-2052","affiliations":[{"raw_affiliation_string":"College of Computer Science and Technology, National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100648910","display_name":"Liquan Xiao","orcid":"https://orcid.org/0000-0002-3285-2625"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Liquan Xiao","raw_affiliation_strings":["College of Computer Science and Technology, National University of Defense Technology, Changsha, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"College of Computer Science and Technology, National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5019476529"],"corresponding_institution_ids":["https://openalex.org/I170215575"],"apc_list":null,"apc_paid":null,"fwci":1.0371,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.76293304,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":98},"biblio":{"volume":"71","issue":"7","first_page":"3488","last_page":"3492"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11697","display_name":"Numerical Methods and Algorithms","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11697","display_name":"Numerical Methods and Algorithms","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10363","display_name":"Low-power high-performance VLSI design","score":0.9904000163078308,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.780550479888916},{"id":"https://openalex.org/keywords/floating-point","display_name":"Floating point","score":0.6531065702438354},{"id":"https://openalex.org/keywords/computer-hardware","display_name":"Computer hardware","score":0.5491079688072205},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.5338481664657593},{"id":"https://openalex.org/keywords/floating-point-unit","display_name":"Floating-point unit","score":0.4719606637954712},{"id":"https://openalex.org/keywords/simd","display_name":"SIMD","score":0.45335710048675537},{"id":"https://openalex.org/keywords/performance-improvement","display_name":"Performance improvement","score":0.43114209175109863},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.3714785873889923},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.36128729581832886},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.22221943736076355},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.09120297431945801}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.780550479888916},{"id":"https://openalex.org/C84211073","wikidata":"https://www.wikidata.org/wiki/Q117879","display_name":"Floating point","level":2,"score":0.6531065702438354},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.5491079688072205},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.5338481664657593},{"id":"https://openalex.org/C110305270","wikidata":"https://www.wikidata.org/wiki/Q733507","display_name":"Floating-point unit","level":3,"score":0.4719606637954712},{"id":"https://openalex.org/C150552126","wikidata":"https://www.wikidata.org/wiki/Q339387","display_name":"SIMD","level":2,"score":0.45335710048675537},{"id":"https://openalex.org/C2778915421","wikidata":"https://www.wikidata.org/wiki/Q3643177","display_name":"Performance improvement","level":2,"score":0.43114209175109863},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.3714785873889923},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.36128729581832886},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.22221943736076355},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.09120297431945801},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tcsii.2024.3359678","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsii.2024.3359678","pdf_url":null,"source":{"id":"https://openalex.org/S93916849","display_name":"IEEE Transactions on Circuits & Systems II Express Briefs","issn_l":"1549-7747","issn":["1549-7747","1558-3791"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems II: Express Briefs","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G4459438753","display_name":null,"funder_award_id":"62272475","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6221703305","display_name":null,"funder_award_id":"62090023","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G947803998","display_name":null,"funder_award_id":"2022JJ10064","funder_id":"https://openalex.org/F4320322866","funder_display_name":"Natural Science Foundation of Hainan Province"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320322866","display_name":"Natural Science Foundation of Hainan Province","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":22,"referenced_works":["https://openalex.org/W1977146902","https://openalex.org/W2026445983","https://openalex.org/W2082444966","https://openalex.org/W2142464516","https://openalex.org/W2147479230","https://openalex.org/W2207050309","https://openalex.org/W2224617858","https://openalex.org/W2472089204","https://openalex.org/W2763421725","https://openalex.org/W2895305554","https://openalex.org/W2914973130","https://openalex.org/W2951607363","https://openalex.org/W3011858437","https://openalex.org/W3134495297","https://openalex.org/W3197695599","https://openalex.org/W3217237164","https://openalex.org/W4220769636","https://openalex.org/W4288346545","https://openalex.org/W4312872667","https://openalex.org/W6720651388","https://openalex.org/W6745245109","https://openalex.org/W6763653508"],"related_works":["https://openalex.org/W4379115909","https://openalex.org/W2541658314","https://openalex.org/W579454177","https://openalex.org/W2797902698","https://openalex.org/W3215589575","https://openalex.org/W2565725308","https://openalex.org/W2908265705","https://openalex.org/W1482601373","https://openalex.org/W2742555907","https://openalex.org/W2121332089"],"abstract_inverted_index":{"The":[0,105],"convergence":[1],"of":[2,11,38,43,137],"HPC":[3],"and":[4,67,77,101,111,119,140],"AI":[5],"has":[6],"brought":[7],"about":[8],"a":[9,27,40,123,134],"diversification":[10],"precision,":[12],"posing":[13],"significant":[14],"hardware":[15,112],"implementation":[16],"challenges.":[17],"This":[18],"paper":[19],"aims":[20],"to":[21,97,115,122,155],"address":[22],"this":[23,50],"issue":[24],"by":[25],"presenting":[26],"low-cost":[28],"floating-point":[29],"(FP)":[30],"fused":[31],"multiply-add":[32],"(FMA)":[33],"unit":[34,53,108,162],"that":[35,90],"is":[36],"capable":[37],"supporting":[39],"wide":[41],"range":[42,136],"FP":[44,138],"formats.":[45,69],"For":[46,70],"the":[47,71,75,128,159],"fewer-than-64-bit":[48],"formats,":[49],"innovative":[51],"FMA":[52,76,107,126,161],"performs":[54],"standard":[55],"or":[56,82],"mixed-precision":[57],"operations":[58,89],"fully":[59],"pipelined":[60],"in":[61,93],"parallel":[62],"for":[63],"SP,":[64],"TF32,":[65],"BF16,":[66],"HP":[68],"64-bit":[72],"DP":[73,125],"format,":[74],"ADD":[78],"operations,":[79],"whether":[80],"independent":[81],"data-related,":[83],"can":[84,151],"be":[85],"organized":[86],"into":[87],"package":[88],"are":[91],"executed":[92],"two":[94],"consecutive":[95],"cycles":[96],"eliminate":[98],"pipeline":[99],"stall":[100],"then":[102],"improve":[103,152],"performance.":[104,120],"proposed":[106,129],"utilizes":[109],"iteration":[110],"vectorization":[113],"methods":[114],"balance":[116],"between":[117],"cost":[118],"Compared":[121],"conventional":[124],"unit,":[127],"design":[130],"not":[131],"only":[132],"supports":[133],"wider":[135],"formats":[139],"functions":[141],"but":[142],"also":[143],"achieves":[144],"higher":[145],"performance":[146,153],"with":[147],"less":[148],"cost.":[149],"It":[150],"up":[154],"1.5x":[156],"more":[157],"than":[158],"dual-mode":[160],"when":[163],"performing":[164],"HPC-AI":[165],"applications.":[166]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
