{"id":"https://openalex.org/W4225139319","doi":"https://doi.org/10.48550/arxiv.2204.13666","title":"Schr\u00f6dinger's FP: Dynamic Adaptation of Floating-Point Containers for Deep Learning Training","display_name":"Schr\u00f6dinger's FP: Dynamic Adaptation of Floating-Point Containers for Deep Learning Training","publication_year":2022,"publication_date":"2022-04-28","ids":{"openalex":"https://openalex.org/W4225139319","doi":"https://doi.org/10.48550/arxiv.2204.13666"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2204.13666","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2204.13666","pdf_url":"https://arxiv.org/pdf/2204.13666","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2204.13666","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5091148470","display_name":"Milo\u0161 Nikoli\u0107","orcid":"https://orcid.org/0000-0001-5892-8248"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Nikoli\u0107, Milo\u0161","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014972684","display_name":"Enrique Torres Sanchez","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sanchez, Enrique Torres","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100446919","display_name":"Jiahui Wang","orcid":"https://orcid.org/0009-0006-1601-8389"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Jiahui","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5045620647","display_name":"Ali Hadi Zadeh","orcid":"https://orcid.org/0000-0001-5823-2494"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zadeh, Ali Hadi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109929462","display_name":"Mostafa A. A. Mahmoud","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mahmoud, Mostafa","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5005226229","display_name":"Ameer Abdelhadi","orcid":"https://orcid.org/0000-0003-4683-8901"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Abdelhadi, Ameer","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Ibrahim, Kareem","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ibrahim, Kareem","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":null,"display_name":"Moshovos, Andreas","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Moshovos, Andreas","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5091148470"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9973000288009644,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9973000288009644,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10320","display_name":"Neural Networks and Applications","score":0.9883999824523926,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.9843999743461609,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6415432095527649},{"id":"https://openalex.org/keywords/memory-footprint","display_name":"Memory footprint","score":0.6290663480758667},{"id":"https://openalex.org/keywords/floating-point","display_name":"Floating point","score":0.4647113084793091},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.4311993718147278},{"id":"https://openalex.org/keywords/static-random-access-memory","display_name":"Static random-access memory","score":0.4244392514228821},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.3468104600906372},{"id":"https://openalex.org/keywords/computer-hardware","display_name":"Computer hardware","score":0.2324722707271576}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6415432095527649},{"id":"https://openalex.org/C74912251","wikidata":"https://www.wikidata.org/wiki/Q6815727","display_name":"Memory footprint","level":2,"score":0.6290663480758667},{"id":"https://openalex.org/C84211073","wikidata":"https://www.wikidata.org/wiki/Q117879","display_name":"Floating point","level":2,"score":0.4647113084793091},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.4311993718147278},{"id":"https://openalex.org/C68043766","wikidata":"https://www.wikidata.org/wiki/Q267416","display_name":"Static random-access memory","level":2,"score":0.4244392514228821},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.3468104600906372},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.2324722707271576},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2204.13666","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2204.13666","pdf_url":"https://arxiv.org/pdf/2204.13666","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"doi:10.48550/arxiv.2204.13666","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2204.13666","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2204.13666","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2204.13666","pdf_url":"https://arxiv.org/pdf/2204.13666","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[{"score":0.9100000262260437,"display_name":"Affordable and clean energy","id":"https://metadata.un.org/sdg/7"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W3151633427","https://openalex.org/W2212894501","https://openalex.org/W2793465010","https://openalex.org/W3024050170","https://openalex.org/W2109451123","https://openalex.org/W2051487156","https://openalex.org/W4289729660","https://openalex.org/W2887023857","https://openalex.org/W2950000202","https://openalex.org/W4321472478"],"abstract_inverted_index":{"The":[0,90],"transfer":[1],"of":[2,58,95,111],"tensors":[3,160],"from/to":[4],"memory":[5],"during":[6,67,194],"neural":[7],"network":[8],"training":[9,195],"dominates":[10],"time":[11],"and":[12,18,56,65,83,93,97,118,128,148,169,199,242],"energy.":[13],"To":[14],"improve":[15,237],"energy":[16],"efficiency":[17],"performance,":[19],"research":[20],"has":[21],"been":[22],"exploring":[23],"ways":[24],"to":[25,38,77,101,113,143,196,216,224,240],"use":[26,162],"narrower":[27],"data":[28],"representations.":[29],"So":[30],"far,":[31],"these":[32],"attempts":[33],"relied":[34],"on":[35,80,151,235],"user-directed":[36],"trial-and-error":[37],"achieve":[39],"convergence.":[40],"We":[41,106],"present":[42,107,211],"methods":[43,51,112,135,180],"that":[44,136,158],"relieve":[45],"users":[46],"from":[47,229],"this":[48],"responsibility.":[49],"Our":[50],"dynamically":[52],"adjust":[53,197],"the":[54,59,139,145,176,182,191,218],"size":[55],"format":[57],"floating-point":[60],"containers":[61],"used":[62],"for":[63,104],"activations":[64],"weights":[66],"training,":[68],"achieving":[69],"adaptivity":[70],"across":[71],"three":[72],"dimensions:":[73],"i)":[74],"which":[75,81],"datatype":[76],"use,":[78],"ii)":[79],"tensor,":[82],"iii)":[84],"how":[85],"it":[86],"changes":[87,189],"over":[88],"time.":[89],"different":[91],"meanings":[92],"distributions":[94],"exponent":[96,119,149,173,200,222],"mantissas":[98],"lead":[99],"us":[100],"tailored":[102],"approaches":[103],"each.":[105],"two":[108,177],"lossy":[109],"pairs":[110],"eliminate":[114],"as":[115,121],"many":[116,159],"mantissa":[117,147,167,198],"bits":[120,168],"possible":[122],"without":[123],"affecting":[124],"accuracy.":[125],"Quantum":[126,129,230],"Mantissa":[127],"Exponent":[130,231],"are":[131],"machine":[132,178],"learning":[133,179],"compression":[134,238],"tap":[137],"into":[138],"gradient":[140],"descent":[141],"algorithm":[142],"learn":[144,157],"minimal":[146],"bitlengths":[150,201],"a":[152,204],"per-layer":[153],"granularity.":[154],"They":[155],"automatically":[156],"can":[161],"just":[163],"1":[164],"or":[165,171,232],"2":[166],"3":[170],"4":[172],"bits.":[174],"Overall,":[175],"reduce":[181],"footprint":[183],"by":[184],"$4.74\\times$.":[185],"Alternatively,":[186],"BitWave":[187,233],"observes":[188],"in":[190,207],"loss":[192],"function":[193],"network-wide,":[202],"yielding":[203],"$3.19\\times$":[205],"reduction":[206],"footprint.":[208],"Finally,":[209],"we":[210],"an":[212],"optional":[213],"method,":[214],"Gecko,":[215],"exploit":[217],"naturally":[219],"emerging,":[220],"lop-sided":[221],"distribution":[223],"losslessly":[225],"compress":[226],"resulting":[227],"exponents":[228],"and,":[234],"average,":[236],"rates":[239],"$5.64\\times$":[241],"$4.56\\times$.":[243]},"counts_by_year":[],"updated_date":"2026-03-25T23:56:10.502304","created_date":"2022-05-01T00:00:00"}
