{"id":"https://openalex.org/W2911920332","doi":"https://doi.org/10.1109/isit.2019.8849667","title":"Fitting ReLUs via SGD and Quantized SGD","display_name":"Fitting ReLUs via SGD and Quantized SGD","publication_year":2019,"publication_date":"2019-07-01","ids":{"openalex":"https://openalex.org/W2911920332","doi":"https://doi.org/10.1109/isit.2019.8849667","mag":"2911920332"},"language":"en","primary_location":{"id":"doi:10.1109/isit.2019.8849667","is_oa":false,"landing_page_url":"https://doi.org/10.1109/isit.2019.8849667","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2019 IEEE International Symposium on Information Theory (ISIT)","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/1901.06587","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5035116783","display_name":"Seyed Mohammadreza Mousavi Kalan","orcid":null},"institutions":[{"id":"https://openalex.org/I1174212","display_name":"University of Southern California","ror":"https://ror.org/03taz7m60","country_code":"US","type":"education","lineage":["https://openalex.org/I1174212"]},{"id":"https://openalex.org/I2800817003","display_name":"California Southern University","ror":"https://ror.org/058zz0t50","country_code":"US","type":"education","lineage":["https://openalex.org/I2800817003"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Seyed Mohammadreza Mousavi Kalan","raw_affiliation_strings":["Ming Hsieh Department of Electrical Engineering, University of Southern, California, CA, USA","University  of Southern California"],"affiliations":[{"raw_affiliation_string":"Ming Hsieh Department of Electrical Engineering, University of Southern, California, CA, USA","institution_ids":["https://openalex.org/I1174212"]},{"raw_affiliation_string":"University  of Southern California","institution_ids":["https://openalex.org/I2800817003","https://openalex.org/I1174212"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5046962187","display_name":"Mahdi Soltanolkotabi","orcid":"https://orcid.org/0000-0003-2101-6418"},"institutions":[{"id":"https://openalex.org/I1174212","display_name":"University of Southern California","ror":"https://ror.org/03taz7m60","country_code":"US","type":"education","lineage":["https://openalex.org/I1174212"]},{"id":"https://openalex.org/I2800817003","display_name":"California Southern University","ror":"https://ror.org/058zz0t50","country_code":"US","type":"education","lineage":["https://openalex.org/I2800817003"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Mahdi Soltanolkotabi","raw_affiliation_strings":["Ming Hsieh Department of Electrical Engineering, University of Southern, California, CA, USA","University  of Southern California"],"affiliations":[{"raw_affiliation_string":"Ming Hsieh Department of Electrical Engineering, University of Southern, California, CA, USA","institution_ids":["https://openalex.org/I1174212"]},{"raw_affiliation_string":"University  of Southern California","institution_ids":["https://openalex.org/I2800817003","https://openalex.org/I1174212"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5047191296","display_name":"A. Salman Avestimehr","orcid":"https://orcid.org/0000-0003-3102-0867"},"institutions":[{"id":"https://openalex.org/I1174212","display_name":"University of Southern California","ror":"https://ror.org/03taz7m60","country_code":"US","type":"education","lineage":["https://openalex.org/I1174212"]},{"id":"https://openalex.org/I2800817003","display_name":"California Southern University","ror":"https://ror.org/058zz0t50","country_code":"US","type":"education","lineage":["https://openalex.org/I2800817003"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"A. Salman Avestimehr","raw_affiliation_strings":["Ming Hsieh Department of Electrical Engineering, University of Southern, California, CA, USA","University  of Southern California"],"affiliations":[{"raw_affiliation_string":"Ming Hsieh Department of Electrical Engineering, University of Southern, California, CA, USA","institution_ids":["https://openalex.org/I1174212"]},{"raw_affiliation_string":"University  of Southern California","institution_ids":["https://openalex.org/I2800817003","https://openalex.org/I1174212"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5035116783"],"corresponding_institution_ids":["https://openalex.org/I1174212","https://openalex.org/I2800817003"],"apc_list":null,"apc_paid":null,"fwci":0.2894,"has_fulltext":true,"cited_by_count":3,"citation_normalized_percentile":{"value":0.64454552,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"2469","last_page":"2473"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10764","display_name":"Privacy-Preserving Technologies in Data","score":0.9966999888420105,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/stochastic-gradient-descent","display_name":"Stochastic gradient descent","score":0.7994123697280884},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.6488264799118042},{"id":"https://openalex.org/keywords/gaussian","display_name":"Gaussian","score":0.5851122736930847},{"id":"https://openalex.org/keywords/convergence","display_name":"Convergence (economics)","score":0.535896897315979},{"id":"https://openalex.org/keywords/gradient-descent","display_name":"Gradient descent","score":0.47682255506515503},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.4729746878147125},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.43083009123802185},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.42033445835113525},{"id":"https://openalex.org/keywords/scheme","display_name":"Scheme (mathematics)","score":0.4143451750278473},{"id":"https://openalex.org/keywords/applied-mathematics","display_name":"Applied mathematics","score":0.40820974111557007},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.37445223331451416},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.37404730916023254},{"id":"https://openalex.org/keywords/combinatorics","display_name":"Combinatorics","score":0.3556315302848816},{"id":"https://openalex.org/keywords/mathematical-analysis","display_name":"Mathematical analysis","score":0.22525247931480408},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.1887577772140503},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.14112794399261475}],"concepts":[{"id":"https://openalex.org/C206688291","wikidata":"https://www.wikidata.org/wiki/Q7617819","display_name":"Stochastic gradient descent","level":3,"score":0.7994123697280884},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.6488264799118042},{"id":"https://openalex.org/C163716315","wikidata":"https://www.wikidata.org/wiki/Q901177","display_name":"Gaussian","level":2,"score":0.5851122736930847},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.535896897315979},{"id":"https://openalex.org/C153258448","wikidata":"https://www.wikidata.org/wiki/Q1199743","display_name":"Gradient descent","level":3,"score":0.47682255506515503},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.4729746878147125},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.43083009123802185},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.42033445835113525},{"id":"https://openalex.org/C77618280","wikidata":"https://www.wikidata.org/wiki/Q1155772","display_name":"Scheme (mathematics)","level":2,"score":0.4143451750278473},{"id":"https://openalex.org/C28826006","wikidata":"https://www.wikidata.org/wiki/Q33521","display_name":"Applied mathematics","level":1,"score":0.40820974111557007},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.37445223331451416},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.37404730916023254},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.3556315302848816},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.22525247931480408},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.1887577772140503},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.14112794399261475},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.0},{"id":"https://openalex.org/C50522688","wikidata":"https://www.wikidata.org/wiki/Q189833","display_name":"Economic growth","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1109/isit.2019.8849667","is_oa":false,"landing_page_url":"https://doi.org/10.1109/isit.2019.8849667","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2019 IEEE International Symposium on Information Theory (ISIT)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:1901.06587","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1901.06587","pdf_url":"https://arxiv.org/pdf/1901.06587","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"mag:2911920332","is_oa":true,"landing_page_url":"https://arxiv.org/abs/1901.06587","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"doi:10.48550/arxiv.1901.06587","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.1901.06587","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:1901.06587","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1901.06587","pdf_url":"https://arxiv.org/pdf/1901.06587","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W2911920332.pdf"},"referenced_works_count":41,"referenced_works":["https://openalex.org/W178986290","https://openalex.org/W809736386","https://openalex.org/W1551360398","https://openalex.org/W2001032894","https://openalex.org/W2048305372","https://openalex.org/W2082916428","https://openalex.org/W2117130368","https://openalex.org/W2156779765","https://openalex.org/W2162287622","https://openalex.org/W2163605009","https://openalex.org/W2168231600","https://openalex.org/W2407022425","https://openalex.org/W2558384550","https://openalex.org/W2755553805","https://openalex.org/W2769644379","https://openalex.org/W2785631679","https://openalex.org/W2786602455","https://openalex.org/W2789138075","https://openalex.org/W2794854061","https://openalex.org/W2798418464","https://openalex.org/W2809670082","https://openalex.org/W2885208219","https://openalex.org/W2901602790","https://openalex.org/W2952817981","https://openalex.org/W2962712496","https://openalex.org/W2963112712","https://openalex.org/W2963827833","https://openalex.org/W2963877580","https://openalex.org/W2966289053","https://openalex.org/W2975778934","https://openalex.org/W3124524204","https://openalex.org/W6684859321","https://openalex.org/W6737871451","https://openalex.org/W6738373677","https://openalex.org/W6748019269","https://openalex.org/W6748519251","https://openalex.org/W6749431339","https://openalex.org/W6752256394","https://openalex.org/W6752544856","https://openalex.org/W6753219657","https://openalex.org/W6754930508"],"related_works":["https://openalex.org/W2975778934","https://openalex.org/W2613481513","https://openalex.org/W3109699007","https://openalex.org/W2973096819","https://openalex.org/W3100770245","https://openalex.org/W3209632558","https://openalex.org/W3127328563","https://openalex.org/W2301987905","https://openalex.org/W2963834323","https://openalex.org/W2791562219","https://openalex.org/W3007224040","https://openalex.org/W2789912105","https://openalex.org/W2964271537","https://openalex.org/W1511263746","https://openalex.org/W3046601824","https://openalex.org/W2946840143","https://openalex.org/W3026733282","https://openalex.org/W2770027754","https://openalex.org/W2901741727","https://openalex.org/W2947027751"],"abstract_inverted_index":{"In":[0],"this":[1,148],"paper":[2],"we":[3,110,150,166],"focus":[4,50,111],"on":[5,51,112],"the":[6,10,14,31,46,57,67,94,120,144,159,171,184],"problem":[7],"of":[8,13,16,20,30,100,174],"finding":[9],"optimal":[11,104,179],"weights":[12],"shallowest":[15],"neural":[17],"networks":[18],"consisting":[19],"a":[21,52,63,73,90,98,113,126,136,152,177],"single":[22],"Rectified":[23],"Linear":[24],"Unit":[25],"(ReLU).":[26],"These":[27],"functions":[28],"are":[29,59,69,162],"form":[32],"x":[33],"\u2192":[34],"max(0,":[35],"\u3008w,":[36],"x\u3009)":[37],"with":[38,97],"w":[39],"\u2208":[40],"\u211d":[41],"<sup":[42],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[43],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">d</sup>":[44],"denoting":[45],"weight":[47,75],"vector.":[48,76],"We":[49,77,187],"planted":[53,74,95],"i":[54],"model":[55,96,180],"where":[56,116,158],"inputs":[58],"chosen":[60],"i.i.d.":[61],"from":[62],"Gaussian":[64],"distribution":[65],"and":[66,132],"labels":[68],"generated":[70],"according":[71],"to":[72,93,106,135,176],"first":[78],"show":[79,167],"that":[80,102,168],"mini-batch":[81,121],"stochastic":[82],"gradient":[83,122],"descent":[84],"when":[85],"suitably":[86],"initialized,":[87],"converges":[88],"at":[89],"geometric":[91],"rate":[92],"number":[99],"samples":[101],"is":[103,123],"up":[105],"numerical":[107,191],"constants.":[108],"Next":[109],"parallel":[114],"implementation":[115],"in":[117,125,147],"each":[118],"iteration":[119],"calculated":[124],"distributed":[127,197],"manner":[128],"across":[129],"multiple":[130],"processors":[131],"then":[133],"broadcast":[134],"master":[137],"or":[138],"all":[139],"other":[140],"processors.":[141],"To":[142],"reduce":[143],"communication":[145,185],"cost":[146],"setting":[149],"utilize":[151],"Quanitzed":[153],"Stochastic":[154],"Gradient":[155],"Scheme":[156],"(QSGD)":[157],"partial":[160],"gradients":[161],"quantized.":[163],"Perhaps":[164],"unexpectedly,":[165],"QSGD":[169],"maintains":[170],"fast":[172],"convergence":[173],"SGD":[175],"globally":[178],"while":[181],"significantly":[182],"reducing":[183],"cost.":[186],"further":[188],"corroborate":[189],"our":[190],"findings":[192],"via":[193],"various":[194],"experiments":[195],"including":[196],"implementations":[198],"over":[199],"Amazon":[200],"EC2.":[201]},"counts_by_year":[{"year":2020,"cited_by_count":2},{"year":2018,"cited_by_count":1}],"updated_date":"2026-03-25T13:04:00.132906","created_date":"2025-10-10T00:00:00"}
