{"id":"https://openalex.org/W4319792209","doi":"https://doi.org/10.48550/arxiv.2302.03764","title":"Sketchy: Memory-efficient Adaptive Regularization with Frequent Directions","display_name":"Sketchy: Memory-efficient Adaptive Regularization with Frequent Directions","publication_year":2023,"publication_date":"2023-02-07","ids":{"openalex":"https://openalex.org/W4319792209","doi":"https://doi.org/10.48550/arxiv.2302.03764"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2302.03764","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2302.03764","pdf_url":"https://arxiv.org/pdf/2302.03764","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2302.03764","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5070286742","display_name":"Vladimir Feinberg","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Feinberg, Vladimir","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100457120","display_name":"Xinyi Chen","orcid":"https://orcid.org/0009-0003-5797-7111"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Xinyi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5022792064","display_name":"Y. Jennifer Sun","orcid":"https://orcid.org/0009-0007-9680-4300"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sun, Y. Jennifer","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5104083306","display_name":"Rohan Anil","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Anil, Rohan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5024431603","display_name":"Elad Hazan","orcid":"https://orcid.org/0000-0002-1566-3216"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hazan, Elad","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5070286742"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":1,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10500","display_name":"Sparse and Compressive Sensing Techniques","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/2206","display_name":"Computational Mechanics"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10500","display_name":"Sparse and Compressive Sensing Techniques","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/2206","display_name":"Computational Mechanics"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.9696000218391418,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11233","display_name":"Advanced Adaptive Filtering Techniques","score":0.9668999910354614,"subfield":{"id":"https://openalex.org/subfields/2206","display_name":"Computational Mechanics"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5867825150489807},{"id":"https://openalex.org/keywords/regularization","display_name":"Regularization (linguistics)","score":0.5494847893714905},{"id":"https://openalex.org/keywords/eigenvalues-and-eigenvectors","display_name":"Eigenvalues and eigenvectors","score":0.43759724497795105},{"id":"https://openalex.org/keywords/adaptive-memory","display_name":"Adaptive memory","score":0.4251402020454407},{"id":"https://openalex.org/keywords/matrix-norm","display_name":"Matrix norm","score":0.42031610012054443},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.41697609424591064},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.39714837074279785},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3075377345085144},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.2752659320831299}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5867825150489807},{"id":"https://openalex.org/C2776135515","wikidata":"https://www.wikidata.org/wiki/Q17143721","display_name":"Regularization (linguistics)","level":2,"score":0.5494847893714905},{"id":"https://openalex.org/C158693339","wikidata":"https://www.wikidata.org/wiki/Q190524","display_name":"Eigenvalues and eigenvectors","level":2,"score":0.43759724497795105},{"id":"https://openalex.org/C30390489","wikidata":"https://www.wikidata.org/wiki/Q4680748","display_name":"Adaptive memory","level":3,"score":0.4251402020454407},{"id":"https://openalex.org/C92207270","wikidata":"https://www.wikidata.org/wiki/Q939253","display_name":"Matrix norm","level":3,"score":0.42031610012054443},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.41697609424591064},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.39714837074279785},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3075377345085144},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.2752659320831299},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C169900460","wikidata":"https://www.wikidata.org/wiki/Q2200417","display_name":"Cognition","level":2,"score":0.0},{"id":"https://openalex.org/C169760540","wikidata":"https://www.wikidata.org/wiki/Q207011","display_name":"Neuroscience","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2302.03764","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2302.03764","pdf_url":"https://arxiv.org/pdf/2302.03764","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2302.03764","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2302.03764","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2302.03764","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2302.03764","pdf_url":"https://arxiv.org/pdf/2302.03764","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[{"display_name":"Decent work and economic growth","id":"https://metadata.un.org/sdg/8","score":0.4000000059604645}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4319792209.pdf","grobid_xml":"https://content.openalex.org/works/W4319792209.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2062626603","https://openalex.org/W2165749285","https://openalex.org/W2474979212","https://openalex.org/W2562455930","https://openalex.org/W2001036795","https://openalex.org/W127450631","https://openalex.org/W2154748900","https://openalex.org/W1601247381","https://openalex.org/W1975743536","https://openalex.org/W3093811938"],"abstract_inverted_index":{"Adaptive":[0],"regularization":[1],"methods":[2],"that":[3,53],"exploit":[4],"more":[5],"than":[6],"the":[7,13,32,35,79,107,116,140,145],"diagonal":[8],"entries":[9],"exhibit":[10],"state":[11],"of":[12,25,34,73,144,152],"art":[14],"performance":[15],"for":[16,67,91,173],"many":[17],"tasks,":[18],"but":[19],"can":[20],"be":[21],"prohibitive":[22],"in":[23,40,109,115,139,158,162],"terms":[24],"memory":[26,69,129,134,172],"and":[27,70,106,166],"running":[28],"time.":[29],"We":[30,62],"find":[31],"spectra":[33],"Kronecker-factored":[36],"gradient":[37,146],"covariance":[38],"matrix":[39,76],"deep":[41],"learning":[42],"(DL)":[43],"training":[44],"tasks":[45],"are":[46],"concentrated":[47],"on":[48],"a":[49,58,64,75,96,159],"small":[50],"leading":[51],"eigenspace":[52],"changes":[54],"throughout":[55],"training,":[56],"motivating":[57],"low-rank":[59],"sketching":[60],"approach.":[61],"describe":[63],"generic":[65],"method":[66,160],"reducing":[68],"compute":[71],"requirements":[72,105],"maintaining":[74],"preconditioner":[77],"using":[78,131],"Frequent":[80],"Directions":[81],"(FD)":[82],"sketch.":[83],"While":[84],"previous":[85],"approaches":[86],"have":[87],"explored":[88],"applying":[89],"FD":[90],"second-order":[92],"optimization,":[93],"we":[94,125,149],"present":[95],"novel":[97],"analysis":[98],"which":[99],"allows":[100],"efficient":[101],"interpolation":[102],"between":[103],"resource":[104],"degradation":[108],"regret":[110,130],"guarantees":[111],"with":[112,164],"rank":[113],"$k$:":[114],"online":[117],"convex":[118],"optimization":[119],"(OCO)":[120],"setting":[121],"over":[122],"dimension":[123],"$d$,":[124],"match":[126],"full-matrix":[127],"$d^2$":[128],"only":[132,170],"$dk$":[133],"up":[135],"to":[136,155],"additive":[137],"error":[138],"bottom":[141],"$d-k$":[142],"eigenvalues":[143],"covariance.":[147],"Further,":[148],"show":[150],"extensions":[151],"our":[153],"work":[154],"Shampoo,":[156],"resulting":[157],"competitive":[161],"quality":[163],"Shampoo":[165],"Adam,":[167],"yet":[168],"requiring":[169],"sub-linear":[171],"tracking":[174],"second":[175],"moments.":[176]},"counts_by_year":[{"year":2023,"cited_by_count":1}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2025-10-10T00:00:00"}
