{"id":"https://openalex.org/W3012674152","doi":"https://doi.org/10.1109/bigdata50022.2020.9378212","title":"Improving Model Training by Periodic Sampling over Weight Distributions","display_name":"Improving Model Training by Periodic Sampling over Weight Distributions","publication_year":2020,"publication_date":"2020-12-10","ids":{"openalex":"https://openalex.org/W3012674152","doi":"https://doi.org/10.1109/bigdata50022.2020.9378212","mag":"3012674152"},"language":"en","primary_location":{"id":"doi:10.1109/bigdata50022.2020.9378212","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata50022.2020.9378212","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"},"type":"article","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/1905.05774","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5001997489","display_name":"Samarth Tripathi","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Samarth Tripathi","raw_affiliation_strings":["LG Advanced AI Lab, Santa Clara, CA, USA"],"affiliations":[{"raw_affiliation_string":"LG Advanced AI Lab, Santa Clara, CA, USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":null,"display_name":"Jiayi Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jiayi Liu","raw_affiliation_strings":["LG Advanced AI Lab, Santa Clara, CA, USA"],"affiliations":[{"raw_affiliation_string":"LG Advanced AI Lab, Santa Clara, CA, USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5058450118","display_name":"Sauptik Dhar","orcid":"https://orcid.org/0000-0002-3555-5466"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sauptik Dhar","raw_affiliation_strings":["LG Advanced AI Lab, Santa Clara, CA, USA"],"affiliations":[{"raw_affiliation_string":"LG Advanced AI Lab, Santa Clara, CA, USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057706634","display_name":"Unmesh Kurup","orcid":"https://orcid.org/0000-0002-3427-0418"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Unmesh Kurup","raw_affiliation_strings":["LG Advanced AI Lab, Santa Clara, CA, USA"],"affiliations":[{"raw_affiliation_string":"LG Advanced AI Lab, Santa Clara, CA, USA","institution_ids":[]}]},{"author_position":"last","author":{"id":null,"display_name":"Mohak Shah","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mohak Shah","raw_affiliation_strings":["LG Advanced AI Lab, Santa Clara, CA, USA"],"affiliations":[{"raw_affiliation_string":"LG Advanced AI Lab, Santa Clara, CA, USA","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5001997489"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.05960452,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"4","issue":null,"first_page":"112","last_page":"122"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13497","display_name":"Hermeneutics and Narrative Identity","score":0.9879000186920166,"subfield":{"id":"https://openalex.org/subfields/1211","display_name":"Philosophy"},"field":{"id":"https://openalex.org/fields/12","display_name":"Arts and Humanities"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T13497","display_name":"Hermeneutics and Narrative Identity","score":0.9879000186920166,"subfield":{"id":"https://openalex.org/subfields/1211","display_name":"Philosophy"},"field":{"id":"https://openalex.org/fields/12","display_name":"Arts and Humanities"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T13695","display_name":"Aging, Elder Care, and Social Issues","score":0.9749000072479248,"subfield":{"id":"https://openalex.org/subfields/3600","display_name":"General Health Professions"},"field":{"id":"https://openalex.org/fields/36","display_name":"Health Professions"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T13099","display_name":"Health, Medicine and Society","score":0.95660001039505,"subfield":{"id":"https://openalex.org/subfields/3600","display_name":"General Health Professions"},"field":{"id":"https://openalex.org/fields/36","display_name":"Health Professions"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/convergence","display_name":"Convergence (economics)","score":0.7812786102294922},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6922146081924438},{"id":"https://openalex.org/keywords/variety","display_name":"Variety (cybernetics)","score":0.6768233180046082},{"id":"https://openalex.org/keywords/sampling","display_name":"Sampling (signal processing)","score":0.6367486715316772},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.6226383447647095},{"id":"https://openalex.org/keywords/monotonic-function","display_name":"Monotonic function","score":0.6059898734092712},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.5741380453109741},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.5542633533477783},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5518833994865417},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.5516669154167175},{"id":"https://openalex.org/keywords/training","display_name":"Training (meteorology)","score":0.5110303163528442},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.47314250469207764},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.36851513385772705},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.16377544403076172},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.06839373707771301}],"concepts":[{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.7812786102294922},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6922146081924438},{"id":"https://openalex.org/C136197465","wikidata":"https://www.wikidata.org/wiki/Q1729295","display_name":"Variety (cybernetics)","level":2,"score":0.6768233180046082},{"id":"https://openalex.org/C140779682","wikidata":"https://www.wikidata.org/wiki/Q210868","display_name":"Sampling (signal processing)","level":3,"score":0.6367486715316772},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.6226383447647095},{"id":"https://openalex.org/C72169020","wikidata":"https://www.wikidata.org/wiki/Q194404","display_name":"Monotonic function","level":2,"score":0.6059898734092712},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5741380453109741},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.5542633533477783},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5518833994865417},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.5516669154167175},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.5110303163528442},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.47314250469207764},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.36851513385772705},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.16377544403076172},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.06839373707771301},{"id":"https://openalex.org/C50522688","wikidata":"https://www.wikidata.org/wiki/Q189833","display_name":"Economic growth","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C106131492","wikidata":"https://www.wikidata.org/wiki/Q3072260","display_name":"Filter (signal processing)","level":2,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C153294291","wikidata":"https://www.wikidata.org/wiki/Q25261","display_name":"Meteorology","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/bigdata50022.2020.9378212","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata50022.2020.9378212","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:1905.05774","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1905.05774","pdf_url":"https://arxiv.org/pdf/1905.05774","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"doi:10.48550/arxiv.1905.05774","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.1905.05774","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:1905.05774","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1905.05774","pdf_url":"https://arxiv.org/pdf/1905.05774","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":33,"referenced_works":["https://openalex.org/W1516903196","https://openalex.org/W1861492603","https://openalex.org/W2080873731","https://openalex.org/W2117539524","https://openalex.org/W2146502635","https://openalex.org/W2151695970","https://openalex.org/W2156779765","https://openalex.org/W2183341477","https://openalex.org/W2194775991","https://openalex.org/W2483727386","https://openalex.org/W2507296351","https://openalex.org/W2737258237","https://openalex.org/W2766164908","https://openalex.org/W2776855315","https://openalex.org/W2810784658","https://openalex.org/W2884822772","https://openalex.org/W2912811302","https://openalex.org/W2949117887","https://openalex.org/W2962933129","https://openalex.org/W2963016543","https://openalex.org/W2963163009","https://openalex.org/W2963173418","https://openalex.org/W2963402313","https://openalex.org/W2963446085","https://openalex.org/W2963446712","https://openalex.org/W2964121744","https://openalex.org/W3118608800","https://openalex.org/W6630841318","https://openalex.org/W6631190155","https://openalex.org/W6638667902","https://openalex.org/W6681435938","https://openalex.org/W6713348437","https://openalex.org/W6787972765"],"related_works":["https://openalex.org/W3137281154","https://openalex.org/W2609701267","https://openalex.org/W83651405","https://openalex.org/W3164074902","https://openalex.org/W2892283076","https://openalex.org/W2621265919","https://openalex.org/W2621365674","https://openalex.org/W2054157877","https://openalex.org/W3157623825","https://openalex.org/W3033041687","https://openalex.org/W3042818515","https://openalex.org/W2893470888","https://openalex.org/W3037064402","https://openalex.org/W2981924372","https://openalex.org/W2913830934","https://openalex.org/W3128963076","https://openalex.org/W2185722686","https://openalex.org/W3161019670","https://openalex.org/W3098430511","https://openalex.org/W3162974929"],"abstract_inverted_index":{"Optimizing":[0],"deep":[1],"neural":[2,69,145],"networks":[3,70],"using":[4],"gradient-based":[5],"techniques":[6,85,140,185],"such":[7],"as":[8],"SGD":[9],"and":[10,23,64,77,123,127,151,156,180],"Adam":[11],"have":[12],"some":[13],"well":[14],"documented":[15],"problems":[16,35,108],"including":[17],"the":[18,45,117,144],"high":[19],"volatility,":[20],"low":[21],"stability":[22,76],"non-monotonicity":[24],"of":[25,90,106,143,174],"performance":[26,129,164],"improvements":[27,165,179],"during":[28],"training.":[29],"One":[30],"approach":[31],"that":[32,93,166],"addresses":[33],"these":[34,139,178,184],"in":[36,135,158],"convex":[37],"settings":[38,63],"is":[39],"Polyak-Ruppert":[40],"Averaging":[41],"(PRA)":[42],"which":[43],"averages":[44],"model":[46,91],"weights":[47,92],"distributions":[48],"over":[49],"gradient":[50,96,148],"updates.":[51],"But":[52],"this":[53,81],"technique":[54],"(and":[55],"its":[56],"variants)":[57],"do":[58],"not":[59],"scale":[60],"for":[61,67,103],"non-convex":[62],"their":[65],"use":[66],"large":[68],"involves":[71],"a":[72,104,132,159,172],"tradeoff":[73],"between":[74],"increased":[75],"optimal":[78,153],"convergence.":[79],"In":[80],"paper,":[82],"we":[83],"introduce":[84],"centered":[86],"around":[87],"periodic":[88],"sampling":[89],"improve":[94],"upon":[95],"update":[97],"methods":[98,150],"(minibatch":[99],"SGD,":[100],"Momentum,":[101],"Adam)":[102],"variety":[105,173],"vision":[107],"(classification,":[109],"detection,":[110],"segmentation).":[111],"Compared":[112],"to":[113,176],"existing":[114,152],"PRA":[115],"approaches,":[116],"proposed":[118],"algorithms":[119],"provide":[120],"better,":[121],"faster":[122],"more":[124,188],"robust":[125],"convergence":[126],"training":[128,154],"with":[130,163],"only":[131],"slight":[133],"increase":[134],"computation":[136],"time.":[137],"Importantly,":[138],"are":[141,167],"independent":[142],"network":[146],"model,":[147],"optimization":[149],"policies,":[155],"converge":[157],"less":[160],"volatile":[161],"fashion":[162],"approximately":[168],"monotonic.":[169],"We":[170],"conduct":[171],"experiments":[175],"quantify":[177],"identify":[181],"scenarios":[182],"where":[183],"could":[186],"be":[187],"useful.":[189]},"counts_by_year":[],"updated_date":"2026-04-21T08:09:41.155169","created_date":"2020-03-27T00:00:00"}
