{"id":"https://openalex.org/W2788909682","doi":"https://doi.org/10.1162/neco_a_01164","title":"Gradient Descent with Identity Initialization Efficiently Learns Positive-Definite Linear Transformations by Deep Residual Networks","display_name":"Gradient Descent with Identity Initialization Efficiently Learns Positive-Definite Linear Transformations by Deep Residual Networks","publication_year":2019,"publication_date":"2019-01-15","ids":{"openalex":"https://openalex.org/W2788909682","doi":"https://doi.org/10.1162/neco_a_01164","mag":"2788909682","pmid":"https://pubmed.ncbi.nlm.nih.gov/30645179"},"language":"en","primary_location":{"id":"doi:10.1162/neco_a_01164","is_oa":true,"landing_page_url":"https://doi.org/10.1162/neco_a_01164","pdf_url":"https://www.mitpressjournals.org/doi/pdf/10.1162/neco_a_01164","source":{"id":"https://openalex.org/S207023548","display_name":"Neural Computation","issn_l":"0899-7667","issn":["0899-7667","1530-888X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310315718","host_organization_name":"The MIT Press","host_organization_lineage":["https://openalex.org/P4310315718"],"host_organization_lineage_names":["The MIT Press"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Neural Computation","raw_type":"journal-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite","pubmed"],"open_access":{"is_oa":true,"oa_status":"bronze","oa_url":"https://www.mitpressjournals.org/doi/pdf/10.1162/neco_a_01164","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5030261391","display_name":"Peter L. Bartlett","orcid":"https://orcid.org/0000-0002-8760-3140"},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Peter L. Bartlett","raw_affiliation_strings":["Department of Statistics, University of California, Berkeley, Berkeley, CA 94720-3860, U.S.A"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Statistics, University of California, Berkeley, Berkeley, CA 94720-3860, U.S.A","institution_ids":["https://openalex.org/I95457486"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014740431","display_name":"David P. Helmbold","orcid":null},"institutions":[{"id":"https://openalex.org/I185103710","display_name":"University of California, Santa Cruz","ror":"https://ror.org/03s65by71","country_code":"US","type":"education","lineage":["https://openalex.org/I185103710"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"David P. Helmbold","raw_affiliation_strings":["Computer Science Department, University of California Santa Cruz, Santa Cruz, CA 95064, U.S.A"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Computer Science Department, University of California Santa Cruz, Santa Cruz, CA 95064, U.S.A","institution_ids":["https://openalex.org/I185103710"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5084487318","display_name":"Philip M. Long","orcid":"https://orcid.org/0000-0002-1010-6197"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Philip M. Long","raw_affiliation_strings":["Google, Mountain View, CA 94043, U.S.A"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Google, Mountain View, CA 94043, U.S.A","institution_ids":["https://openalex.org/I1291425158"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5030261391"],"corresponding_institution_ids":["https://openalex.org/I95457486"],"apc_list":null,"apc_paid":null,"fwci":0.7254,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.77630308,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":95},"biblio":{"volume":"31","issue":"3","first_page":"477","last_page":"502"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10500","display_name":"Sparse and Compressive Sensing Techniques","score":0.9904999732971191,"subfield":{"id":"https://openalex.org/subfields/2206","display_name":"Computational Mechanics"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10792","display_name":"Matrix Theory and Algorithms","score":0.987500011920929,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/identity-matrix","display_name":"Identity matrix","score":0.7144054770469666},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.6528993844985962},{"id":"https://openalex.org/keywords/positive-definite-matrix","display_name":"Positive-definite matrix","score":0.6454637050628662},{"id":"https://openalex.org/keywords/gradient-descent","display_name":"Gradient descent","score":0.618870198726654},{"id":"https://openalex.org/keywords/combinatorics","display_name":"Combinatorics","score":0.5932972431182861},{"id":"https://openalex.org/keywords/eigenvalues-and-eigenvectors","display_name":"Eigenvalues and eigenvectors","score":0.5661169290542603},{"id":"https://openalex.org/keywords/parameterized-complexity","display_name":"Parameterized complexity","score":0.5625224709510803},{"id":"https://openalex.org/keywords/initialization","display_name":"Initialization","score":0.5504067540168762},{"id":"https://openalex.org/keywords/constant","display_name":"Constant (computer programming)","score":0.44998759031295776},{"id":"https://openalex.org/keywords/polynomial","display_name":"Polynomial","score":0.4490073323249817},{"id":"https://openalex.org/keywords/bounded-function","display_name":"Bounded function","score":0.4150528013706207},{"id":"https://openalex.org/keywords/discrete-mathematics","display_name":"Discrete mathematics","score":0.37012267112731934},{"id":"https://openalex.org/keywords/mathematical-analysis","display_name":"Mathematical analysis","score":0.22363528609275818},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.20355165004730225},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.15553224086761475},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.10933828353881836},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.08904767036437988}],"concepts":[{"id":"https://openalex.org/C134567657","wikidata":"https://www.wikidata.org/wiki/Q193794","display_name":"Identity matrix","level":3,"score":0.7144054770469666},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.6528993844985962},{"id":"https://openalex.org/C49712288","wikidata":"https://www.wikidata.org/wiki/Q77601250","display_name":"Positive-definite matrix","level":3,"score":0.6454637050628662},{"id":"https://openalex.org/C153258448","wikidata":"https://www.wikidata.org/wiki/Q1199743","display_name":"Gradient descent","level":3,"score":0.618870198726654},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.5932972431182861},{"id":"https://openalex.org/C158693339","wikidata":"https://www.wikidata.org/wiki/Q190524","display_name":"Eigenvalues and eigenvectors","level":2,"score":0.5661169290542603},{"id":"https://openalex.org/C165464430","wikidata":"https://www.wikidata.org/wiki/Q1570441","display_name":"Parameterized complexity","level":2,"score":0.5625224709510803},{"id":"https://openalex.org/C114466953","wikidata":"https://www.wikidata.org/wiki/Q6034165","display_name":"Initialization","level":2,"score":0.5504067540168762},{"id":"https://openalex.org/C2777027219","wikidata":"https://www.wikidata.org/wiki/Q1284190","display_name":"Constant (computer programming)","level":2,"score":0.44998759031295776},{"id":"https://openalex.org/C90119067","wikidata":"https://www.wikidata.org/wiki/Q43260","display_name":"Polynomial","level":2,"score":0.4490073323249817},{"id":"https://openalex.org/C34388435","wikidata":"https://www.wikidata.org/wiki/Q2267362","display_name":"Bounded function","level":2,"score":0.4150528013706207},{"id":"https://openalex.org/C118615104","wikidata":"https://www.wikidata.org/wiki/Q121416","display_name":"Discrete mathematics","level":1,"score":0.37012267112731934},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.22363528609275818},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.20355165004730225},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.15553224086761475},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.10933828353881836},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.08904767036437988},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":6,"locations":[{"id":"doi:10.1162/neco_a_01164","is_oa":true,"landing_page_url":"https://doi.org/10.1162/neco_a_01164","pdf_url":"https://www.mitpressjournals.org/doi/pdf/10.1162/neco_a_01164","source":{"id":"https://openalex.org/S207023548","display_name":"Neural Computation","issn_l":"0899-7667","issn":["0899-7667","1530-888X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310315718","host_organization_name":"The MIT Press","host_organization_lineage":["https://openalex.org/P4310315718"],"host_organization_lineage_names":["The MIT Press"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Neural Computation","raw_type":"journal-article"},{"id":"pmid:30645179","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/30645179","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Neural computation","raw_type":null},{"id":"pmh:oai:arXiv.org:1802.06093","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1802.06093","pdf_url":"https://arxiv.org/pdf/1802.06093","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"mag:2788909682","is_oa":true,"landing_page_url":"https://arxiv.org/pdf/1802.06093v4","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"doi:10.48550/arxiv.1802.06093","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.1802.06093","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"},{"id":"mag:2964162902","is_oa":false,"landing_page_url":"http://proceedings.mlr.press/v80/bartlett18a/bartlett18a.pdf","pdf_url":null,"source":{"id":"https://openalex.org/S4306419644","display_name":"International Conference on Machine Learning","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":"International Conference on Machine Learning","raw_type":null}],"best_oa_location":{"id":"doi:10.1162/neco_a_01164","is_oa":true,"landing_page_url":"https://doi.org/10.1162/neco_a_01164","pdf_url":"https://www.mitpressjournals.org/doi/pdf/10.1162/neco_a_01164","source":{"id":"https://openalex.org/S207023548","display_name":"Neural Computation","issn_l":"0899-7667","issn":["0899-7667","1530-888X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310315718","host_organization_name":"The MIT Press","host_organization_lineage":["https://openalex.org/P4310315718"],"host_organization_lineage_names":["The MIT Press"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Neural Computation","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":7,"referenced_works":["https://openalex.org/W2007454648","https://openalex.org/W2028461624","https://openalex.org/W2135704410","https://openalex.org/W2194775991","https://openalex.org/W4240805545","https://openalex.org/W4250589301","https://openalex.org/W4300029251"],"related_works":["https://openalex.org/W2971497379","https://openalex.org/W2201865757","https://openalex.org/W3083898667","https://openalex.org/W3177484304","https://openalex.org/W2722983786","https://openalex.org/W3115973547","https://openalex.org/W3091531185","https://openalex.org/W2298801569","https://openalex.org/W3170692334","https://openalex.org/W2964031251","https://openalex.org/W2800987155","https://openalex.org/W2806105434","https://openalex.org/W3093641160","https://openalex.org/W3042003204","https://openalex.org/W2092157735","https://openalex.org/W3030399983","https://openalex.org/W3094444964","https://openalex.org/W2766533971","https://openalex.org/W2903893215","https://openalex.org/W2763062829"],"abstract_inverted_index":{"We":[0,44,69,108,241],"analyze":[1,242],"algorithms":[2,47,221],"for":[3,78,117,245,256,279],"approximating":[4],"a":[5,27,104,127,175,211,218],"function":[6,28],"[Formula:":[7,11,15,29,35,41,86,96,118,148,162,167,171,181,188,192,204,249,253,258,276,281,289],"see":[8,12,16,30,36,42,87,97,119,149,163,168,172,182,189,193,205,250,254,259,277,282,290],"text]":[9,13,17,31,37,98,120,150,164,173,206,251,255,260,278,283,291],"mapping":[10],"to":[14,81,115,239],"using":[18,174],"deep":[19],"linear":[20],"neural":[21],"networks,":[22],"that":[23,25,48,61,111,133,157,160,199,222,248,272,287,293],"is,":[24],"learn":[26,49],"parameterized":[32],"by":[33,40,103],"matrices":[34],"and":[38,130,191,209,229,284],"defined":[39],"text].":[43,194],"focus":[45],"on":[46,53,73],"through":[50],"gradient":[51,79,112,224],"descent":[52,80,113,225],"the":[54,59,62,65,74,83,90,93,124,139,184,201,233,246,274,285,296],"population":[55],"quadratic":[56],"loss":[57,101],"in":[58,89,141,180,235],"case":[60,91,247],"distribution":[63],"over":[64],"inputs":[66],"is":[67,126,151,207],"isotropic.":[68],"provide":[70],"polynomial":[71,179],"bounds":[72],"number":[75,176,186],"of":[76,136,170,177,187,217,220],"iterations":[77],"approximate":[82],"least-squares":[84,202],"matrix":[85,203],"text],":[88,183,190],"where":[92],"initial":[94],"hypothesis":[95],"has":[99,210],"excess":[100],"bounded":[102],"small":[105],"enough":[106],"constant.":[107],"also":[109],"show":[110,132,156,198],"fails":[114],"converge":[116],"whose":[121],"distance":[122],"from":[123],"identity":[125,140,227,234],"larger":[128],"constant,":[129],"we":[131,155,197],"some":[134],"forms":[135],"regularization":[137],"toward":[138,232],"each":[142,236],"layer":[143],"do":[144],"not":[145,263],"help.":[146],"If":[147],"symmetric":[152,208],"positive":[153],"definite,":[154],"an":[158,166,243],"algorithm":[159,244,267],"initializes":[161],"learns":[165],"text]-approximation":[169],"updates":[178],"condition":[185],"In":[195],"contrast,":[196],"if":[200],"negative":[212],"eigenvalue,":[213],"then":[214],"all":[215,257,280],"members":[216],"class":[219],"perform":[223],"with":[226],"initialization,":[228],"optionally":[230],"regularize":[231],"layer,":[237],"fail":[238],"converge.":[240],"satisfies":[252],"but":[261],"may":[262],"be":[264],"symmetric.":[265],"This":[266],"uses":[268],"two":[269],"regularizers:":[270],"one":[271],"maintains":[273],"invariant":[275],"other":[286],"\u201cbalances\u201d":[288],"so":[292],"they":[294],"have":[295],"same":[297],"singular":[298],"values.":[299]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":2},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":1}],"updated_date":"2026-05-21T06:26:12.895304","created_date":"2025-10-10T00:00:00"}
