{"id":"https://openalex.org/W4388430727","doi":"https://doi.org/10.1109/tcad.2023.3329778","title":"Mortar-FP8: Morphing the Existing FP32 Infrastructure for High-Performance Deep Learning Acceleration","display_name":"Mortar-FP8: Morphing the Existing FP32 Infrastructure for High-Performance Deep Learning Acceleration","publication_year":2023,"publication_date":"2023-11-06","ids":{"openalex":"https://openalex.org/W4388430727","doi":"https://doi.org/10.1109/tcad.2023.3329778"},"language":"en","primary_location":{"id":"doi:10.1109/tcad.2023.3329778","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcad.2023.3329778","pdf_url":null,"source":{"id":"https://openalex.org/S100835903","display_name":"IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems","issn_l":"0278-0070","issn":["0278-0070","1937-4151"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100455572","display_name":"Hongyan Li","orcid":"https://orcid.org/0000-0001-8819-904X"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210090176","display_name":"Institute of Computing Technology","ror":"https://ror.org/0090r4d87","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210090176"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Hongyan Li","raw_affiliation_strings":["State Key Laboratory of Computer Architecture, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"State Key Laboratory of Computer Architecture, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210090176","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014244388","display_name":"Hang L\u00fc","orcid":"https://orcid.org/0000-0001-6233-3538"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210090176","display_name":"Institute of Computing Technology","ror":"https://ror.org/0090r4d87","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210090176"]},{"id":"https://openalex.org/I4391767888","display_name":"State Key Laboratory of Computer Architecture","ror":"https://ror.org/02pq9w205","country_code":null,"type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210090176","https://openalex.org/I4391767888"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hang Lu","raw_affiliation_strings":["State Key Laboratory of Computer Architecture, Institute of Computing Technology, the Zhongguancun Laboratory, and the Shanghai Innovation Center for Processor Technologies, Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"State Key Laboratory of Computer Architecture, Institute of Computing Technology, the Zhongguancun Laboratory, and the Shanghai Innovation Center for Processor Technologies, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210090176","https://openalex.org/I19820366","https://openalex.org/I4391767888"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5023380073","display_name":"Xiaowei Li","orcid":"https://orcid.org/0000-0002-0874-814X"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210090176","display_name":"Institute of Computing Technology","ror":"https://ror.org/0090r4d87","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210090176"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaowei Li","raw_affiliation_strings":["State Key Laboratory of Computer Architecture, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"State Key Laboratory of Computer Architecture, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210090176","https://openalex.org/I19820366"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5100455572"],"corresponding_institution_ids":["https://openalex.org/I19820366","https://openalex.org/I4210090176"],"apc_list":null,"apc_paid":null,"fwci":0.3611,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.60928183,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":96},"biblio":{"volume":"43","issue":"3","first_page":"878","last_page":"891"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13114","display_name":"Image Processing Techniques and Applications","score":0.9932000041007996,"subfield":{"id":"https://openalex.org/subfields/2214","display_name":"Media Technology"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11105","display_name":"Advanced Image Processing Techniques","score":0.9901000261306763,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7250275015830994},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.6180551052093506},{"id":"https://openalex.org/keywords/mortar","display_name":"Mortar","score":0.6037899255752563},{"id":"https://openalex.org/keywords/hardware-acceleration","display_name":"Hardware acceleration","score":0.5474874377250671},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4663293659687042},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.4570280909538269},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.45038819313049316},{"id":"https://openalex.org/keywords/acceleration","display_name":"Acceleration","score":0.44103553891181946},{"id":"https://openalex.org/keywords/computer-engineering","display_name":"Computer engineering","score":0.4151042103767395},{"id":"https://openalex.org/keywords/materials-science","display_name":"Materials science","score":0.13175281882286072}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7250275015830994},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.6180551052093506},{"id":"https://openalex.org/C130767629","wikidata":"https://www.wikidata.org/wiki/Q7905205","display_name":"Mortar","level":2,"score":0.6037899255752563},{"id":"https://openalex.org/C13164978","wikidata":"https://www.wikidata.org/wiki/Q600158","display_name":"Hardware acceleration","level":3,"score":0.5474874377250671},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4663293659687042},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.4570280909538269},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.45038819313049316},{"id":"https://openalex.org/C117896860","wikidata":"https://www.wikidata.org/wiki/Q11376","display_name":"Acceleration","level":2,"score":0.44103553891181946},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.4151042103767395},{"id":"https://openalex.org/C192562407","wikidata":"https://www.wikidata.org/wiki/Q228736","display_name":"Materials science","level":0,"score":0.13175281882286072},{"id":"https://openalex.org/C159985019","wikidata":"https://www.wikidata.org/wiki/Q181790","display_name":"Composite material","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C74650414","wikidata":"https://www.wikidata.org/wiki/Q11397","display_name":"Classical mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tcad.2023.3329778","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcad.2023.3329778","pdf_url":null,"source":{"id":"https://openalex.org/S100835903","display_name":"IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems","issn_l":"0278-0070","issn":["0278-0070","1937-4151"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.6299999952316284,"display_name":"Industry, innovation and infrastructure","id":"https://metadata.un.org/sdg/9"}],"awards":[{"id":"https://openalex.org/G5185755230","display_name":null,"funder_award_id":"2021098","funder_id":"https://openalex.org/F4320321133","funder_display_name":"Chinese Academy of Sciences"},{"id":"https://openalex.org/G7874008062","display_name":null,"funder_award_id":"62172387","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320321133","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":41,"referenced_works":["https://openalex.org/W1191365092","https://openalex.org/W1791560514","https://openalex.org/W1861492603","https://openalex.org/W2108598243","https://openalex.org/W2194775991","https://openalex.org/W2291160084","https://openalex.org/W2300242332","https://openalex.org/W2541839172","https://openalex.org/W2549139847","https://openalex.org/W2607041014","https://openalex.org/W2769654144","https://openalex.org/W2798729263","https://openalex.org/W2949870694","https://openalex.org/W2953212265","https://openalex.org/W2962874694","https://openalex.org/W2963446712","https://openalex.org/W2982770724","https://openalex.org/W3014447010","https://openalex.org/W3094502228","https://openalex.org/W3102015846","https://openalex.org/W3104962140","https://openalex.org/W3133588229","https://openalex.org/W3183406752","https://openalex.org/W3203082935","https://openalex.org/W3207265322","https://openalex.org/W4221101426","https://openalex.org/W4247198796","https://openalex.org/W4293584584","https://openalex.org/W4297743978","https://openalex.org/W4297812065","https://openalex.org/W4313332258","https://openalex.org/W4318685740","https://openalex.org/W6639703010","https://openalex.org/W6695314431","https://openalex.org/W6696798448","https://openalex.org/W6746698991","https://openalex.org/W6762484958","https://openalex.org/W6767032739","https://openalex.org/W6771680064","https://openalex.org/W6789836147","https://openalex.org/W6843734122"],"related_works":["https://openalex.org/W2146872326","https://openalex.org/W4319952061","https://openalex.org/W4280636456","https://openalex.org/W3103034165","https://openalex.org/W4310584535","https://openalex.org/W3154092384","https://openalex.org/W4295935044","https://openalex.org/W4307927141","https://openalex.org/W3159906349","https://openalex.org/W2625222559"],"abstract_inverted_index":{"Vanilla":[0],"deep":[1,95,111],"neural":[2],"networks":[3],"(DNNs)":[4],"after":[5],"training":[6],"are":[7,198,206],"represented":[8],"with":[9,140,156],"native":[10],"floating-point":[11],"32":[12],"(fp32)":[13],"weights.":[14],"We":[15,103],"observe":[16],"that":[17],"the":[18,28,31,56,71,76,105,150,165],"bit-level":[19],"sparsity":[20,129],"of":[21,33,161,196],"these":[22],"weights":[23,81,152],"is":[24,35],"very":[25],"abundant":[26],"in":[27],"mantissa":[29,77,128],"and":[30,54,59,78,100,122,163,182,194,201,209],"distribution":[32],"exponent":[34],"aggregated,":[36],"which":[37],"can":[38],"all":[39],"be":[40],"directly":[41],"exploited":[42],"to":[43,74,82,92,131,153,174],"speed":[44],"up":[45,130,173],"model":[46],"inference.":[47],"In":[48],"this":[49],"article,":[50],"we":[51],"propose":[52],"Mortar":[53,126,197],"Mortar-FP8,":[55],"offline/online":[57],"software":[58,72],"hardware":[60,89,167],"collaborative":[61],"approaches":[62],"for":[63,212],"fp32":[64,80,151],"DNN":[65],"acceleration.":[66],"The":[67,192],"proposed":[68],"methods":[69],"include":[70],"algorithms":[73],"morph":[75,149],"convert":[79],"fp8":[83,154],"format,":[84],"as":[85,87],"well":[86],"associated":[88],"accelerator":[90,168],"architecture":[91],"accelerate":[93],"general-purpose":[94],"learning":[96,112],"through":[97],"optimized":[98],"algorithm":[99],"specialized":[101],"hardware.":[102],"highlight":[104],"following":[106],"results":[107],"by":[108],"evaluating":[109],"various":[110],"tasks,":[113],"including":[114],"image":[115,123],"classification,":[116],"object":[117],"detection,":[118],"video":[119],"understanding,":[120],"video,":[121],"super-resolution:":[124],"1)":[125],"increase":[127],"<inline-formula":[132,175,183],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[133,176,184],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">":[134,177,185],"<tex-math":[135,178,186],"notation=\"LaTeX\">$1.58\\times":[136],"-.09\\times":[137],"$":[138,180,188],"</tex-math></inline-formula>":[139,181,189],"only":[141],"a":[142,157],"negligible":[143],"~0.2%":[144],"accuracy":[145,159],"loss;":[146],"2)":[147],"Mortar-FP8":[148],"format":[155],"minimal":[158],"loss":[160],"~0.3%;":[162],"3)":[164],"corresponding":[166],"significantly":[169],"outperforms":[170],"baselines,":[171],"achieving":[172],"notation=\"LaTeX\">$6.032\\times":[179],"notation=\"LaTeX\">$6.99\\times":[187],"performance":[190],"improvements.":[191],"area":[193],"power":[195],"0.031":[199],"mm2":[200,208],"68.58":[202],"mW.":[203],"Those":[204],"metrics":[205],"0.0505":[207],"25.16":[210],"mW":[211],"Mortar-FP8.":[213]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":1}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
