{"id":"https://openalex.org/W3130711370","doi":"https://doi.org/10.1145/3437801.3441624","title":"Dynamic scaling for low-precision learning","display_name":"Dynamic scaling for low-precision learning","publication_year":2021,"publication_date":"2021-02-17","ids":{"openalex":"https://openalex.org/W3130711370","doi":"https://doi.org/10.1145/3437801.3441624","mag":"3130711370"},"language":"en","primary_location":{"id":"doi:10.1145/3437801.3441624","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3437801.3441624","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 26th ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5045920192","display_name":"Ruobing Han","orcid":"https://orcid.org/0000-0002-3090-3951"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Ruobing Han","raw_affiliation_strings":["Peking University"],"affiliations":[{"raw_affiliation_string":"Peking University","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5058015880","display_name":"Min Si","orcid":"https://orcid.org/0000-0002-0208-096X"},"institutions":[{"id":"https://openalex.org/I1282105669","display_name":"Argonne National Laboratory","ror":"https://ror.org/05gvnxz63","country_code":"US","type":"facility","lineage":["https://openalex.org/I1282105669","https://openalex.org/I1330989302","https://openalex.org/I39565521","https://openalex.org/I40347166"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Min Si","raw_affiliation_strings":["Argonne National Laboratory"],"affiliations":[{"raw_affiliation_string":"Argonne National Laboratory","institution_ids":["https://openalex.org/I1282105669"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5076825233","display_name":"James Demmel","orcid":"https://orcid.org/0000-0003-1145-3745"},"institutions":[{"id":"https://openalex.org/I134446601","display_name":"Berkeley College","ror":"https://ror.org/02xewxa75","country_code":"US","type":"education","lineage":["https://openalex.org/I134446601"]},{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"James Demmel","raw_affiliation_strings":["UC Berkeley"],"affiliations":[{"raw_affiliation_string":"UC Berkeley","institution_ids":["https://openalex.org/I134446601","https://openalex.org/I95457486"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100658705","display_name":"Yang You","orcid":"https://orcid.org/0000-0003-2816-4384"},"institutions":[{"id":"https://openalex.org/I165932596","display_name":"National University of Singapore","ror":"https://ror.org/01tgyzw49","country_code":"SG","type":"education","lineage":["https://openalex.org/I165932596"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Yang You","raw_affiliation_strings":["National University of Singapore"],"affiliations":[{"raw_affiliation_string":"National University of Singapore","institution_ids":["https://openalex.org/I165932596"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5045920192"],"corresponding_institution_ids":["https://openalex.org/I20231570"],"apc_list":null,"apc_paid":null,"fwci":0.1921,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.45482026,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"480","last_page":"482"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8653771877288818},{"id":"https://openalex.org/keywords/bottleneck","display_name":"Bottleneck","score":0.7361335754394531},{"id":"https://openalex.org/keywords/synchronizing","display_name":"Synchronizing","score":0.6577458381652832},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.5828136205673218},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.5806939005851746},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4940660297870636},{"id":"https://openalex.org/keywords/computer-engineering","display_name":"Computer engineering","score":0.4709499776363373},{"id":"https://openalex.org/keywords/bandwidth","display_name":"Bandwidth (computing)","score":0.46907538175582886},{"id":"https://openalex.org/keywords/floating-point","display_name":"Floating point","score":0.46195554733276367},{"id":"https://openalex.org/keywords/point","display_name":"Point (geometry)","score":0.4519568979740143},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.4485079050064087},{"id":"https://openalex.org/keywords/scaling","display_name":"Scaling","score":0.44129347801208496},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.4283798336982727},{"id":"https://openalex.org/keywords/real-time-computing","display_name":"Real-time computing","score":0.352411150932312},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.28337374329566956},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.1351548135280609},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.1306450366973877},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.08200374245643616}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8653771877288818},{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.7361335754394531},{"id":"https://openalex.org/C162932704","wikidata":"https://www.wikidata.org/wiki/Q1058791","display_name":"Synchronizing","level":3,"score":0.6577458381652832},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.5828136205673218},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.5806939005851746},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4940660297870636},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.4709499776363373},{"id":"https://openalex.org/C2776257435","wikidata":"https://www.wikidata.org/wiki/Q1576430","display_name":"Bandwidth (computing)","level":2,"score":0.46907538175582886},{"id":"https://openalex.org/C84211073","wikidata":"https://www.wikidata.org/wiki/Q117879","display_name":"Floating point","level":2,"score":0.46195554733276367},{"id":"https://openalex.org/C28719098","wikidata":"https://www.wikidata.org/wiki/Q44946","display_name":"Point (geometry)","level":2,"score":0.4519568979740143},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.4485079050064087},{"id":"https://openalex.org/C99844830","wikidata":"https://www.wikidata.org/wiki/Q102441924","display_name":"Scaling","level":2,"score":0.44129347801208496},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.4283798336982727},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.352411150932312},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.28337374329566956},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.1351548135280609},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.1306450366973877},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.08200374245643616},{"id":"https://openalex.org/C761482","wikidata":"https://www.wikidata.org/wiki/Q118093","display_name":"Transmission (telecommunications)","level":2,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3437801.3441624","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3437801.3441624","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 26th ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/9","score":0.5799999833106995,"display_name":"Industry, innovation and infrastructure"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":4,"referenced_works":["https://openalex.org/W2622263826","https://openalex.org/W2763421725","https://openalex.org/W2962747323","https://openalex.org/W2994144272"],"related_works":["https://openalex.org/W2359328895","https://openalex.org/W2363535981","https://openalex.org/W2058965144","https://openalex.org/W2164382479","https://openalex.org/W2007402811","https://openalex.org/W2326762630","https://openalex.org/W2391354408","https://openalex.org/W2891987081","https://openalex.org/W4280610722","https://openalex.org/W3185228140"],"abstract_inverted_index":{"In":[0,49],"recent":[1],"years,":[2],"distributed":[3,18],"deep":[4],"learning":[5],"is":[6,40],"becoming":[7],"popular":[8],"in":[9],"industry":[10],"and":[11,95,161,165,193],"academia.":[12],"Although":[13],"researchers":[14,160],"want":[15],"to":[16,135,159,197],"use":[17],"systems":[19],"for":[20,30,44,78,91,170],"training,":[21],"it":[22,157,195],"has":[23],"been":[24],"reported":[25],"that":[26,60,90,144],"the":[27,46,63,76,101,137,152,178,198],"communication":[28,84],"cost":[29],"synchronizing":[31],"gradients":[32,39,68,107],"can":[33,61,74,99,119,146,176],"be":[34],"a":[35,41,82,112,126,132,148,167],"bottleneck.":[36],"Using":[37],"low-precision":[38,70,184],"promising":[42],"technique":[43],"reducing":[45],"bandwidth":[47],"requirement.":[48],"this":[50],"work,":[51],"we":[52,66,118,130,163],"propose":[53,131],"Auto":[54],"Precision":[55],"Scaling":[56],"(APS),":[57],"an":[58,182],"algorithm":[59],"improve":[62,75],"accuracy":[64,77,114,122],"when":[65],"communicate":[67],"by":[69,104,124],"floating-point":[71,106,186],"values.":[72],"APS":[73,98,145],"all":[79],"precisions":[80],"with":[81,108],"trivial":[83],"cost.":[85],"Our":[86,140],"experimental":[87,141],"results":[88,142],"show":[89,143],"both":[92],"image":[93],"classification":[94],"segmentation,":[96],"applying":[97],"train":[100],"state-of-the-art":[102,153],"models":[103],"8-bit":[105],"no":[109],"or":[110],"only":[111],"tiny":[113],"loss":[115,123],"(<0.05%).":[116],"Furthermore,":[117],"avoid":[120],"any":[121],"designing":[125],"hybrid-precision":[127],"technique.":[128],"Finally,":[129],"performance":[133],"model":[134],"evaluate":[136],"proposed":[138],"method.":[139,154],"get":[147],"significant":[149],"speedup":[150],"over":[151],"To":[155],"make":[156,194],"available":[158],"developers,":[162],"design":[164],"implement":[166],"high-performance":[168],"system":[169],"customized":[171,185],"precision":[172],"Deep":[173],"Learning(CPD),":[174],"which":[175],"simulate":[177],"training":[179],"process":[180],"using":[181],"arbitrary":[183],"format.":[187],"We":[188],"integrate":[189],"CPD":[190],"into":[191],"PyTorch":[192],"open-source":[196],"public1.":[199]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1},{"year":2022,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
