{"id":"https://openalex.org/W2991010949","doi":"https://doi.org/10.1109/tit.2021.3065212","title":"Benefits of Jointly Training Autoencoders: An Improved Neural Tangent Kernel Analysis","display_name":"Benefits of Jointly Training Autoencoders: An Improved Neural Tangent Kernel Analysis","publication_year":2021,"publication_date":"2021-03-10","ids":{"openalex":"https://openalex.org/W2991010949","doi":"https://doi.org/10.1109/tit.2021.3065212","mag":"2991010949"},"language":"en","primary_location":{"id":"doi:10.1109/tit.2021.3065212","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tit.2021.3065212","pdf_url":null,"source":{"id":"https://openalex.org/S4502562","display_name":"IEEE Transactions on Information Theory","issn_l":"0018-9448","issn":["0018-9448","1557-9654"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Information Theory","raw_type":"journal-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/1911.11983","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5029385096","display_name":"Thanh V. Nguyen","orcid":"https://orcid.org/0000-0003-1576-5420"},"institutions":[{"id":"https://openalex.org/I173911158","display_name":"Iowa State University","ror":"https://ror.org/04rswrd78","country_code":"US","type":"education","lineage":["https://openalex.org/I173911158"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Thanh V. Nguyen","raw_affiliation_strings":["Department of Electrical and Computer Engineering (ECE), Iowa State University, Ames, IA, USA"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering (ECE), Iowa State University, Ames, IA, USA","institution_ids":["https://openalex.org/I173911158"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Raymond K. W. Wong","orcid":"https://orcid.org/0000-0001-9342-3755"},"institutions":[{"id":"https://openalex.org/I91045830","display_name":"Texas A&M University","ror":"https://ror.org/01f5ytq51","country_code":"US","type":"education","lineage":["https://openalex.org/I91045830"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Raymond K. W. Wong","raw_affiliation_strings":["Department of Statistics, Texas A&M University, College Station, TX, USA"],"affiliations":[{"raw_affiliation_string":"Department of Statistics, Texas A&M University, College Station, TX, USA","institution_ids":["https://openalex.org/I91045830"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5066142047","display_name":"Chinmay Hegde","orcid":"https://orcid.org/0000-0003-4574-8066"},"institutions":[{"id":"https://openalex.org/I57206974","display_name":"New York University","ror":"https://ror.org/0190ak572","country_code":"US","type":"education","lineage":["https://openalex.org/I57206974"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Chinmay Hegde","raw_affiliation_strings":["Tandon School of Engineering, New York University, New York City, NY, USA","[Tandon School of Engineering, New York University, New York City, NY, USA]"],"affiliations":[{"raw_affiliation_string":"Tandon School of Engineering, New York University, New York City, NY, USA","institution_ids":["https://openalex.org/I57206974"]},{"raw_affiliation_string":"[Tandon School of Engineering, New York University, New York City, NY, USA]","institution_ids":["https://openalex.org/I57206974"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5029385096"],"corresponding_institution_ids":["https://openalex.org/I173911158"],"apc_list":null,"apc_paid":null,"fwci":0.2799,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.60913929,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":"67","issue":"7","first_page":"4669","last_page":"4692"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12676","display_name":"Machine Learning and ELM","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/autoencoder","display_name":"Autoencoder","score":0.7985949516296387},{"id":"https://openalex.org/keywords/parameterized-complexity","display_name":"Parameterized complexity","score":0.7537292242050171},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7076115012168884},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.6689056158065796},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.6219152212142944},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6190312504768372},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.546572208404541},{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.5146995186805725},{"id":"https://openalex.org/keywords/gradient-descent","display_name":"Gradient descent","score":0.5061826109886169},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.4804684519767761},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.44630545377731323},{"id":"https://openalex.org/keywords/convergence","display_name":"Convergence (economics)","score":0.43981289863586426},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.31792259216308594},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.21517333388328552}],"concepts":[{"id":"https://openalex.org/C101738243","wikidata":"https://www.wikidata.org/wiki/Q786435","display_name":"Autoencoder","level":3,"score":0.7985949516296387},{"id":"https://openalex.org/C165464430","wikidata":"https://www.wikidata.org/wiki/Q1570441","display_name":"Parameterized complexity","level":2,"score":0.7537292242050171},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7076115012168884},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.6689056158065796},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.6219152212142944},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6190312504768372},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.546572208404541},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.5146995186805725},{"id":"https://openalex.org/C153258448","wikidata":"https://www.wikidata.org/wiki/Q1199743","display_name":"Gradient descent","level":3,"score":0.5061826109886169},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.4804684519767761},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.44630545377731323},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.43981289863586426},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.31792259216308594},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.21517333388328552},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0},{"id":"https://openalex.org/C50522688","wikidata":"https://www.wikidata.org/wiki/Q189833","display_name":"Economic growth","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1109/tit.2021.3065212","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tit.2021.3065212","pdf_url":null,"source":{"id":"https://openalex.org/S4502562","display_name":"IEEE Transactions on Information Theory","issn_l":"0018-9448","issn":["0018-9448","1557-9654"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Information Theory","raw_type":"journal-article"},{"id":"pmh:oai:arXiv.org:1911.11983","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1911.11983","pdf_url":"https://arxiv.org/pdf/1911.11983","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"mag:2991010949","is_oa":true,"landing_page_url":"https://arxiv.org/pdf/1911.11983.pdf","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"doi:10.48550/arxiv.1911.11983","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.1911.11983","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:1911.11983","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1911.11983","pdf_url":"https://arxiv.org/pdf/1911.11983","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1271002188","display_name":null,"funder_award_id":"DMS-1612985/1806063","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G6207593860","display_name":null,"funder_award_id":"CCF-1815101","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G6695499138","display_name":null,"funder_award_id":"CCF-1934904","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G7306339223","display_name":null,"funder_award_id":"CAREER CCF-2005804","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":48,"referenced_works":["https://openalex.org/W2105464873","https://openalex.org/W2107411554","https://openalex.org/W2146758737","https://openalex.org/W2765614010","https://openalex.org/W2769458173","https://openalex.org/W2809090039","https://openalex.org/W2886067286","https://openalex.org/W2892999459","https://openalex.org/W2894604724","https://openalex.org/W2899748887","https://openalex.org/W2900959181","https://openalex.org/W2911279176","https://openalex.org/W2911610187","https://openalex.org/W2911867426","https://openalex.org/W2913892099","https://openalex.org/W2920270464","https://openalex.org/W2922033872","https://openalex.org/W2942052807","https://openalex.org/W2948046738","https://openalex.org/W2949978219","https://openalex.org/W2950987997","https://openalex.org/W2954474386","https://openalex.org/W2962880345","https://openalex.org/W2963571266","https://openalex.org/W2970217468","https://openalex.org/W3021189130","https://openalex.org/W3035495594","https://openalex.org/W3098045837","https://openalex.org/W6675834492","https://openalex.org/W6681291508","https://openalex.org/W6737650911","https://openalex.org/W6739166439","https://openalex.org/W6745990031","https://openalex.org/W6746390323","https://openalex.org/W6752495264","https://openalex.org/W6753918066","https://openalex.org/W6755150206","https://openalex.org/W6756001544","https://openalex.org/W6756137178","https://openalex.org/W6758334222","https://openalex.org/W6758546984","https://openalex.org/W6758902322","https://openalex.org/W6760829302","https://openalex.org/W6761496057","https://openalex.org/W6763304541","https://openalex.org/W6763681727","https://openalex.org/W6779746691","https://openalex.org/W6948022635"],"related_works":["https://openalex.org/W3133313904","https://openalex.org/W2890732907","https://openalex.org/W3021189130","https://openalex.org/W2993367001","https://openalex.org/W2163321856","https://openalex.org/W2186887781","https://openalex.org/W2246853956","https://openalex.org/W2988133370","https://openalex.org/W3137422635","https://openalex.org/W3006518667","https://openalex.org/W2890311316","https://openalex.org/W3044287261","https://openalex.org/W1481935768","https://openalex.org/W3174185214","https://openalex.org/W2753753246","https://openalex.org/W2912848048","https://openalex.org/W3041900396","https://openalex.org/W2261232712","https://openalex.org/W2739997491","https://openalex.org/W3196579076"],"abstract_inverted_index":{"Deep":[0],"neural":[1,98],"networks":[2,99,185],"can":[3],"achieve":[4],"impressive":[5],"performance":[6],"in":[7,26,50,89,116,136,154,162,179],"the":[8,17,36,51,66,85,93,110,114,117,130,145,163,171,180],"regime":[9],"where":[10],"they":[11],"are":[12],"massively":[13],"over-parameterized.":[14],"Consequently,":[15],"over":[16,151],"past":[18],"year,":[19],"there":[20],"has":[21,54],"been":[22],"a":[23,122,159],"growing":[24],"interest":[25],"analyzing":[27],"optimization":[28],"and":[29,139,176],"generalization":[30],"properties":[31],"of":[32,38,48,69,113,133,148,166,173],"over-parameterized":[33,74,181],"networks.":[34],"However,":[35],"majority":[37],"existing":[39],"work":[40,91],"only":[41],"applies":[42],"to":[43,191],"supervised":[44],"learning.":[45],"The":[46],"role":[47],"over-parameterization":[49],"unsupervised":[52],"setting":[53],"by":[55],"contrast":[56],"gained":[57],"far":[58],"less":[59],"attention.":[60],"In":[61],"this":[62],"paper,":[63],"we":[64,127,169],"study":[65],"inductive":[67],"bias":[68],"gradient":[70,101,111,134],"descent":[71,102,135],"for":[72,84],"two-layer":[73],"autoencoders":[75,115,175],"with":[76],"ReLU":[77],"activation.":[78],"We":[79,107],"first":[80],"provide":[81],"theoretical":[82],"evidence":[83],"memorization":[86],"phenomena":[87],"observed":[88],"recent":[90],"using":[92],"property":[94],"that":[95,178],"infinitely":[96],"wide":[97],"under":[100],"evolve":[103],"as":[104],"linear":[105,131],"models.":[106],"also":[108],"analyze":[109,170],"dynamics":[112],"finite-width":[118],"setting.":[119],"Starting":[120],"from":[121,186],"randomly":[123,187],"initialized":[124,188],"autoencoder":[125],"network,":[126],"rigorously":[128],"prove":[129,177],"convergence":[132],"two":[137],"weakly-trained":[138],"jointly-trained":[140],"regimes.":[141],"Our":[142],"results":[143],"indicate":[144],"considerable":[146],"benefits":[147],"joint":[149],"training":[150,153,183],"weak":[152],"finding":[155],"global":[156],"optima,":[157],"achieving":[158],"dramatic":[160],"decrease":[161],"required":[164],"level":[165],"over-parameterization.":[167],"Finally,":[168],"case":[172],"weight-tied":[174],"setting,":[182],"such":[184],"points":[189],"leads":[190],"certain":[192],"unexpected":[193],"degeneracies.":[194]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2021,"cited_by_count":1}],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-10-10T00:00:00"}
