{"id":"https://openalex.org/W3033129087","doi":"https://doi.org/10.1145/3394450.3397468","title":"On the challenges in programming mixed-precision deep neural networks","display_name":"On the challenges in programming mixed-precision deep neural networks","publication_year":2020,"publication_date":"2020-06-01","ids":{"openalex":"https://openalex.org/W3033129087","doi":"https://doi.org/10.1145/3394450.3397468","mag":"3033129087"},"language":"en","primary_location":{"id":"doi:10.1145/3394450.3397468","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3394450.3397468","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 4th ACM SIGPLAN International Workshop on Machine Learning and Programming Languages","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5103214318","display_name":"Rongxuan Zhao","orcid":"https://orcid.org/0009-0007-0875-5298"},"institutions":[{"id":"https://openalex.org/I47508984","display_name":"Imperial College London","ror":"https://ror.org/041kmwe10","country_code":"GB","type":"education","lineage":["https://openalex.org/I47508984"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Ruizhe Zhao","raw_affiliation_strings":["Imperial College London, UK"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Imperial College London, UK","institution_ids":["https://openalex.org/I47508984"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057940557","display_name":"Wayne Luk","orcid":"https://orcid.org/0000-0002-6750-927X"},"institutions":[{"id":"https://openalex.org/I47508984","display_name":"Imperial College London","ror":"https://ror.org/041kmwe10","country_code":"GB","type":"education","lineage":["https://openalex.org/I47508984"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Wayne Luk","raw_affiliation_strings":["Imperial College London, UK"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Imperial College London, UK","institution_ids":["https://openalex.org/I47508984"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102741305","display_name":"Chao Xiong","orcid":"https://orcid.org/0000-0003-1325-4192"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chao Xiong","raw_affiliation_strings":["Corerain Technologies, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Corerain Technologies, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103060695","display_name":"Xinyu Niu","orcid":"https://orcid.org/0000-0003-0202-9408"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xinyu Niu","raw_affiliation_strings":["Corerain Technologies, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Corerain Technologies, China","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5009397115","display_name":"Kuen Hung Tsoi","orcid":"https://orcid.org/0000-0002-6856-6727"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kuen Hung Tsoi","raw_affiliation_strings":["Corerain Technologies, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Corerain Technologies, China","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0979,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.383303,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"20","last_page":"28"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12808","display_name":"Ferroelectric and Negative Capacitance Devices","score":0.9934999942779541,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8584305047988892},{"id":"https://openalex.org/keywords/implementation","display_name":"Implementation","score":0.6592965126037598},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.5745654106140137},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.5043164491653442},{"id":"https://openalex.org/keywords/data-type","display_name":"Data type","score":0.47436437010765076},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.43683528900146484},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.4330177307128906},{"id":"https://openalex.org/keywords/computer-engineering","display_name":"Computer engineering","score":0.43265557289123535},{"id":"https://openalex.org/keywords/programming-paradigm","display_name":"Programming paradigm","score":0.4225495457649231},{"id":"https://openalex.org/keywords/range","display_name":"Range (aeronautics)","score":0.4179448187351227},{"id":"https://openalex.org/keywords/deep-neural-networks","display_name":"Deep neural networks","score":0.4175734519958496},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.36415398120880127},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.36086970567703247},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.200546532869339},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.1586107611656189}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8584305047988892},{"id":"https://openalex.org/C26713055","wikidata":"https://www.wikidata.org/wiki/Q245962","display_name":"Implementation","level":2,"score":0.6592965126037598},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.5745654106140137},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.5043164491653442},{"id":"https://openalex.org/C138958017","wikidata":"https://www.wikidata.org/wiki/Q190087","display_name":"Data type","level":2,"score":0.47436437010765076},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.43683528900146484},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.4330177307128906},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.43265557289123535},{"id":"https://openalex.org/C34165917","wikidata":"https://www.wikidata.org/wiki/Q188267","display_name":"Programming paradigm","level":2,"score":0.4225495457649231},{"id":"https://openalex.org/C204323151","wikidata":"https://www.wikidata.org/wiki/Q905424","display_name":"Range (aeronautics)","level":2,"score":0.4179448187351227},{"id":"https://openalex.org/C2984842247","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep neural networks","level":3,"score":0.4175734519958496},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.36415398120880127},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.36086970567703247},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.200546532869339},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.1586107611656189},{"id":"https://openalex.org/C159985019","wikidata":"https://www.wikidata.org/wiki/Q181790","display_name":"Composite material","level":1,"score":0.0},{"id":"https://openalex.org/C192562407","wikidata":"https://www.wikidata.org/wiki/Q228736","display_name":"Materials science","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3394450.3397468","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3394450.3397468","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 4th ACM SIGPLAN International Workshop on Machine Learning and Programming Languages","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G4587427570","display_name":null,"funder_award_id":"EP/S030069/1","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"},{"id":"https://openalex.org/G600182179","display_name":null,"funder_award_id":"EP/L00058X/1","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"},{"id":"https://openalex.org/G7493804148","display_name":null,"funder_award_id":"EP/N031768/1","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"},{"id":"https://openalex.org/G774180880","display_name":null,"funder_award_id":"EP/P010040/1","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"}],"funders":[{"id":"https://openalex.org/F4320334627","display_name":"Engineering and Physical Sciences Research Council","ror":"https://ror.org/0439y7842"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":23,"referenced_works":["https://openalex.org/W639708223","https://openalex.org/W1901129140","https://openalex.org/W2000050212","https://openalex.org/W2013156670","https://openalex.org/W2099404643","https://openalex.org/W2111394443","https://openalex.org/W2194775991","https://openalex.org/W2271840356","https://openalex.org/W2274287116","https://openalex.org/W2302255633","https://openalex.org/W2412782625","https://openalex.org/W2565639579","https://openalex.org/W2621550233","https://openalex.org/W2758487246","https://openalex.org/W2787513823","https://openalex.org/W2796347433","https://openalex.org/W2798341898","https://openalex.org/W2946955515","https://openalex.org/W2963125769","https://openalex.org/W2964299589","https://openalex.org/W2964350391","https://openalex.org/W2982041622","https://openalex.org/W3016220765"],"related_works":["https://openalex.org/W2120447654","https://openalex.org/W2977179488","https://openalex.org/W2144453115","https://openalex.org/W2128223750","https://openalex.org/W4238532390","https://openalex.org/W2188872161","https://openalex.org/W2961779879","https://openalex.org/W1571189856","https://openalex.org/W1872937274","https://openalex.org/W4285818394"],"abstract_inverted_index":{"Deep":[0,51],"Neural":[1],"Networks":[2],"(DNNs)":[3],"are":[4,43],"resilient":[5],"to":[6,31,58],"reduced":[7],"data":[8,14,65,153],"precision,":[9],"which":[10,75,160],"motivates":[11],"exploiting":[12],"low-precision":[13,26],"formats":[15,42],"for":[16],"more":[17],"efficient":[18,152],"computation,":[19],"especially":[20],"on":[21,47,138,170],"custom":[22],"hardware":[23],"accelerators.":[24],"Multiple":[25],"types":[27,66],"can":[28,149],"be":[29],"mixed":[30],"fit":[32],"the":[33,99,163],"dynamic":[34],"range":[35],"of":[36],"different":[37],"DNN":[38,114],"layers.":[39],"However,":[40],"these":[41,126],"not":[44],"often":[45],"supported":[46],"popular":[48],"microprocessors":[49],"and":[50,61,67,78,95,102,111,136,166],"Learning":[52],"(DL)":[53],"frameworks,":[54],"hence":[55],"we":[56,131],"have":[57],"manually":[59],"implement":[60],"optimize":[62],"such":[63],"novel":[64],"integrate":[68],"them":[69],"with":[70],"multiple":[71],"DL":[72,142],"framework":[73,123],"components,":[74],"is":[76],"tedious":[77],"error-prone.":[79],"This":[80],"paper":[81],"first":[82],"reviews":[83],"three":[84],"major":[85],"challenges":[86],"in":[87],"programming":[88],"mixed-precision":[89,113,171],"DNNs,":[90],"including":[91],"generating":[92],"high-performance":[93],"arithmetic":[94],"typecast":[96],"functions,":[97],"reducing":[98],"recompilation":[100],"time":[101],"bloated":[103],"binary":[104],"size":[105],"caused":[106],"by":[107],"excessive":[108],"template":[109],"specialization,":[110],"optimizing":[112],"computational":[115],"graphs.":[116],"We":[117],"present":[118,132],"our":[119,133,139],"approach,":[120],"Lowgen,":[121],"a":[122],"that":[124,147,156],"addresses":[125],"challenges.":[127],"For":[128],"each":[129],"challenge,":[130],"solution":[134],"implemented":[135],"tested":[137],"in-house,":[140],"TensorFlow-like":[141],"framework.":[143],"Empirical":[144],"evaluation":[145],"shows":[146],"Lowgen":[148],"automatically":[150],"generate":[151],"type":[154],"implementations":[155],"enable":[157],"significant":[158],"speed-up,":[159],"greatly":[161],"lowers":[162],"development":[164],"effort":[165],"enhances":[167],"research":[168],"productivity":[169],"DNN.":[172]},"counts_by_year":[{"year":2021,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
