{"id":"https://openalex.org/W4320024179","doi":"https://doi.org/10.1109/bigdata55660.2022.10020262","title":"MBAG: A Scalable Mini-Block Adaptive Gradient Method for Deep Neural Networks","display_name":"MBAG: A Scalable Mini-Block Adaptive Gradient Method for Deep Neural Networks","publication_year":2022,"publication_date":"2022-12-17","ids":{"openalex":"https://openalex.org/W4320024179","doi":"https://doi.org/10.1109/bigdata55660.2022.10020262"},"language":"en","primary_location":{"id":"doi:10.1109/bigdata55660.2022.10020262","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/bigdata55660.2022.10020262","pdf_url":null,"source":{"id":"https://openalex.org/S4363607709","display_name":"2022 IEEE International Conference on Big Data (Big Data)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100415736","display_name":"Jaewoo Lee","orcid":"https://orcid.org/0000-0002-7418-651X"},"institutions":[{"id":"https://openalex.org/I165733156","display_name":"University of Georgia","ror":"https://ror.org/00te3t702","country_code":"US","type":"education","lineage":["https://openalex.org/I165733156"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Jaewoo Lee","raw_affiliation_strings":["University of Georgia,Department of Computer Science,Athens,USA","Department of Computer Science, University of Georgia, Athens, USA"],"affiliations":[{"raw_affiliation_string":"University of Georgia,Department of Computer Science,Athens,USA","institution_ids":["https://openalex.org/I165733156"]},{"raw_affiliation_string":"Department of Computer Science, University of Georgia, Athens, USA","institution_ids":["https://openalex.org/I165733156"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5100415736"],"corresponding_institution_ids":["https://openalex.org/I165733156"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.20588235,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"9","issue":null,"first_page":"1286","last_page":"1291"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12676","display_name":"Machine Learning and ELM","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10500","display_name":"Sparse and Compressive Sensing Techniques","score":0.9976000189781189,"subfield":{"id":"https://openalex.org/subfields/2206","display_name":"Computational Mechanics"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6923363208770752},{"id":"https://openalex.org/keywords/preconditioner","display_name":"Preconditioner","score":0.6840639114379883},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.615619421005249},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.5559791326522827},{"id":"https://openalex.org/keywords/solver","display_name":"Solver","score":0.543487548828125},{"id":"https://openalex.org/keywords/computational-complexity-theory","display_name":"Computational complexity theory","score":0.4984874725341797},{"id":"https://openalex.org/keywords/block","display_name":"Block (permutation group theory)","score":0.4841754734516144},{"id":"https://openalex.org/keywords/convergence","display_name":"Convergence (economics)","score":0.45807573199272156},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.40574532747268677},{"id":"https://openalex.org/keywords/iterative-method","display_name":"Iterative method","score":0.3851465582847595},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.18436115980148315}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6923363208770752},{"id":"https://openalex.org/C167431342","wikidata":"https://www.wikidata.org/wiki/Q1754327","display_name":"Preconditioner","level":3,"score":0.6840639114379883},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.615619421005249},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.5559791326522827},{"id":"https://openalex.org/C2778770139","wikidata":"https://www.wikidata.org/wiki/Q1966904","display_name":"Solver","level":2,"score":0.543487548828125},{"id":"https://openalex.org/C179799912","wikidata":"https://www.wikidata.org/wiki/Q205084","display_name":"Computational complexity theory","level":2,"score":0.4984874725341797},{"id":"https://openalex.org/C2777210771","wikidata":"https://www.wikidata.org/wiki/Q4927124","display_name":"Block (permutation group theory)","level":2,"score":0.4841754734516144},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.45807573199272156},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.40574532747268677},{"id":"https://openalex.org/C159694833","wikidata":"https://www.wikidata.org/wiki/Q2321565","display_name":"Iterative method","level":2,"score":0.3851465582847595},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.18436115980148315},{"id":"https://openalex.org/C50522688","wikidata":"https://www.wikidata.org/wiki/Q189833","display_name":"Economic growth","level":1,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/bigdata55660.2022.10020262","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/bigdata55660.2022.10020262","pdf_url":null,"source":{"id":"https://openalex.org/S4363607709","display_name":"2022 IEEE International Conference on Big Data (Big Data)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":48,"referenced_works":["https://openalex.org/W196761320","https://openalex.org/W1483804921","https://openalex.org/W1522301498","https://openalex.org/W1988720110","https://openalex.org/W1994616650","https://openalex.org/W2051434435","https://openalex.org/W2078394884","https://openalex.org/W2100495367","https://openalex.org/W2107438106","https://openalex.org/W2129160848","https://openalex.org/W2146502635","https://openalex.org/W2155894447","https://openalex.org/W2166347285","https://openalex.org/W2167729035","https://openalex.org/W2194775991","https://openalex.org/W2625567132","https://openalex.org/W2777827707","https://openalex.org/W2788190072","https://openalex.org/W2886837499","https://openalex.org/W2936995161","https://openalex.org/W2951840540","https://openalex.org/W2952376685","https://openalex.org/W2964303576","https://openalex.org/W2972872797","https://openalex.org/W3035182906","https://openalex.org/W3037301072","https://openalex.org/W3101098636","https://openalex.org/W3211402766","https://openalex.org/W4221139658","https://openalex.org/W6608133726","https://openalex.org/W6631190155","https://openalex.org/W6676105031","https://openalex.org/W6681435938","https://openalex.org/W6683107984","https://openalex.org/W6684497637","https://openalex.org/W6729495890","https://openalex.org/W6739785051","https://openalex.org/W6746885453","https://openalex.org/W6748982233","https://openalex.org/W6753892653","https://openalex.org/W6756702221","https://openalex.org/W6761030284","https://openalex.org/W6762990414","https://openalex.org/W6767732602","https://openalex.org/W6779861650","https://openalex.org/W6780665282","https://openalex.org/W6803653593","https://openalex.org/W6810167063"],"related_works":["https://openalex.org/W2955975433","https://openalex.org/W4288309012","https://openalex.org/W1970414682","https://openalex.org/W1513752583","https://openalex.org/W3013286738","https://openalex.org/W3045572928","https://openalex.org/W4226023416","https://openalex.org/W2212236609","https://openalex.org/W2507702484","https://openalex.org/W2155328232"],"abstract_inverted_index":{"Preconditioning":[0],"is":[1,45,150],"a":[2,61],"technique":[3],"widely":[4],"used":[5],"to":[6,49,111,152],"accelerate":[7],"the":[8,24,27,50,78,83,89,95,113,140,143],"convergence":[9],"of":[10,30,91,122,129,135,142,148,154],"optimization":[11],"algorithms.":[12],"Recently":[13],"proposed":[14,144],"efficient":[15],"second-order":[16,158],"algorithms":[17,159],"(such":[18],"as":[19],"KFAC)":[20],"showed":[21],"that":[22,71,153],"preconditioning":[23,79,92],"gradient":[25,64],"using":[26,94,106,165],"curvature":[28],"information":[29],"loss":[31],"function":[32],"can":[33],"help":[34],"achieve":[35],"faster":[36],"convergence.":[37],"However,":[38],"their":[39],"practicality":[40],"in":[41,76],"large-scale":[42],"deep":[43],"learning":[44],"still":[46],"limited":[47],"due":[48],"high":[51],"computational":[52,74],"and":[53,99,125,157,162],"storage":[54,114],"cost.":[55],"In":[56],"this":[57],"work,":[58],"we":[59],"propose":[60],"stochastic":[62],"adaptive":[63],"algorithm,":[65],"called":[66],"Mini-Block":[67],"Adaptive":[68],"Gradient":[69],"(MBAG),":[70],"addresses":[72],"those":[73],"challenges":[75],"computing":[77],"matrix.":[80],"To":[81],"reduce":[82],"per-iteration":[84],"cost,":[85],"MBAG":[86,116,149],"analytically":[87],"computes":[88,127],"inverse":[90],"matrix":[93,96],"inversion":[97],"lemma":[98],"then":[100],"approximately":[101],"finds":[102],"its":[103],"square":[104],"root":[105],"an":[107],"iterative":[108],"solver.":[109],"Further,":[110],"mitigate":[112],"requirement,":[115],"partitions":[117],"model":[118],"parameters":[119],"into":[120],"subsets":[121],"small":[123],"size":[124],"only":[126],"sub-blocks":[128],"preconditioner":[130],"associated":[131],"with":[132],"each":[133],"subset":[134],"parameters.":[136],"This":[137],"greatly":[138],"improves":[139],"scalability":[141],"algorithm.":[145],"The":[146],"performance":[147],"compared":[151],"popular":[155],"first-":[156],"on":[160],"auto-encoder":[161],"classification":[163],"tasks":[164],"real":[166],"datasets.":[167]},"counts_by_year":[],"updated_date":"2025-12-21T01:58:51.020947","created_date":"2025-10-10T00:00:00"}
