{"id":"https://openalex.org/W4410341587","doi":"https://doi.org/10.1109/tit.2025.3568697","title":"Information-Theoretic Generalization Bounds for Deep Neural Networks","display_name":"Information-Theoretic Generalization Bounds for Deep Neural Networks","publication_year":2025,"publication_date":"2025-05-13","ids":{"openalex":"https://openalex.org/W4410341587","doi":"https://doi.org/10.1109/tit.2025.3568697"},"language":"en","primary_location":{"id":"doi:10.1109/tit.2025.3568697","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tit.2025.3568697","pdf_url":null,"source":{"id":"https://openalex.org/S4502562","display_name":"IEEE Transactions on Information Theory","issn_l":"0018-9448","issn":["0018-9448","1557-9654"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Information Theory","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5073481627","display_name":"Haiyun He","orcid":"https://orcid.org/0000-0002-1797-6101"},"institutions":[{"id":"https://openalex.org/I205783295","display_name":"Cornell University","ror":"https://ror.org/05bnh6r87","country_code":"US","type":"education","lineage":["https://openalex.org/I205783295"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Haiyun He","raw_affiliation_strings":["Center for Applied Mathematics, Cornell University, Ithaca, NY, USA","Center for Applied Mathematics, USA"],"raw_orcid":"https://orcid.org/0000-0002-1797-6101","affiliations":[{"raw_affiliation_string":"Center for Applied Mathematics, Cornell University, Ithaca, NY, USA","institution_ids":["https://openalex.org/I205783295"]},{"raw_affiliation_string":"Center for Applied Mathematics, USA","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5071112095","display_name":"Ziv Goldfeld","orcid":"https://orcid.org/0000-0003-3406-3950"},"institutions":[{"id":"https://openalex.org/I205783295","display_name":"Cornell University","ror":"https://ror.org/05bnh6r87","country_code":"US","type":"education","lineage":["https://openalex.org/I205783295"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ziv Goldfeld","raw_affiliation_strings":["School of Electrical and Computer Engineering, Cornell University, Ithaca, NY, USA"],"raw_orcid":"https://orcid.org/0000-0003-3406-3950","affiliations":[{"raw_affiliation_string":"School of Electrical and Computer Engineering, Cornell University, Ithaca, NY, USA","institution_ids":["https://openalex.org/I205783295"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5073481627"],"corresponding_institution_ids":["https://openalex.org/I205783295"],"apc_list":null,"apc_paid":null,"fwci":4.3465,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.93821457,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":91,"max":98},"biblio":{"volume":"71","issue":"8","first_page":"6227","last_page":"6247"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10320","display_name":"Neural Networks and Applications","score":0.9751999974250793,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10320","display_name":"Neural Networks and Applications","score":0.9751999974250793,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.691057562828064},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.594914436340332},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.5592073202133179},{"id":"https://openalex.org/keywords/information-theory","display_name":"Information theory","score":0.49279335141181946},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4749210774898529},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.38752666115760803},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.32111936807632446},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.13034453988075256}],"concepts":[{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.691057562828064},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.594914436340332},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.5592073202133179},{"id":"https://openalex.org/C52622258","wikidata":"https://www.wikidata.org/wiki/Q131222","display_name":"Information theory","level":2,"score":0.49279335141181946},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4749210774898529},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.38752666115760803},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.32111936807632446},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.13034453988075256},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tit.2025.3568697","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tit.2025.3568697","pdf_url":null,"source":{"id":"https://openalex.org/S4502562","display_name":"IEEE Transactions on Information Theory","issn_l":"0018-9448","issn":["0018-9448","1557-9654"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Information Theory","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1200192541","display_name":null,"funder_award_id":"1955997","funder_id":"https://openalex.org/F4320337388","funder_display_name":"Division of Computer and Network Systems"},{"id":"https://openalex.org/G1602460655","display_name":null,"funder_award_id":"2210368","funder_id":"https://openalex.org/F4320337380","funder_display_name":"Division of Mathematical Sciences"},{"id":"https://openalex.org/G2492197981","display_name":null,"funder_award_id":"2046018","funder_id":"https://openalex.org/F4320337387","funder_display_name":"Division of Computing and Communication Foundations"},{"id":"https://openalex.org/G3015205638","display_name":null,"funder_award_id":"FA9550-23-1-0301","funder_id":"https://openalex.org/F4320338279","funder_display_name":"Air Force Office of Scientific Research"},{"id":"https://openalex.org/G7456902319","display_name":null,"funder_award_id":"2308446","funder_id":"https://openalex.org/F4320337387","funder_display_name":"Division of Computing and Communication Foundations"}],"funders":[{"id":"https://openalex.org/F4320307102","display_name":"Intel Corporation","ror":"https://ror.org/01ek73717"},{"id":"https://openalex.org/F4320309624","display_name":"Cornell University","ror":"https://ror.org/05bnh6r87"},{"id":"https://openalex.org/F4320337380","display_name":"Division of Mathematical Sciences","ror":"https://ror.org/051fftw81"},{"id":"https://openalex.org/F4320337387","display_name":"Division of Computing and Communication Foundations","ror":"https://ror.org/01mng8331"},{"id":"https://openalex.org/F4320337388","display_name":"Division of Computer and Network Systems","ror":"https://ror.org/02rdzmk74"},{"id":"https://openalex.org/F4320338279","display_name":"Air Force Office of Scientific Research","ror":"https://ror.org/011e9bt93"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":69,"referenced_works":["https://openalex.org/W1585160083","https://openalex.org/W2014384147","https://openalex.org/W2029029543","https://openalex.org/W2081231767","https://openalex.org/W2099111195","https://openalex.org/W2111406701","https://openalex.org/W2129774678","https://openalex.org/W2194775991","https://openalex.org/W2290247654","https://openalex.org/W2541283300","https://openalex.org/W2566079294","https://openalex.org/W2752796333","https://openalex.org/W2880214242","https://openalex.org/W2887344700","https://openalex.org/W2910572885","https://openalex.org/W2910589063","https://openalex.org/W2912811302","https://openalex.org/W2951714068","https://openalex.org/W2962702650","https://openalex.org/W2963038205","https://openalex.org/W2976533594","https://openalex.org/W2979452771","https://openalex.org/W2996320484","https://openalex.org/W3008527562","https://openalex.org/W3022414928","https://openalex.org/W3123190877","https://openalex.org/W3170524081","https://openalex.org/W3198746530","https://openalex.org/W3214845324","https://openalex.org/W4230410498","https://openalex.org/W4310348270","https://openalex.org/W4377941395","https://openalex.org/W4395447967","https://openalex.org/W6600213771","https://openalex.org/W6674330103","https://openalex.org/W6678917918","https://openalex.org/W6684809622","https://openalex.org/W6692956712","https://openalex.org/W6726983090","https://openalex.org/W6733862737","https://openalex.org/W6734079340","https://openalex.org/W6735544424","https://openalex.org/W6736583452","https://openalex.org/W6738074204","https://openalex.org/W6740483536","https://openalex.org/W6741194474","https://openalex.org/W6741653254","https://openalex.org/W6745276634","https://openalex.org/W6745558287","https://openalex.org/W6745751660","https://openalex.org/W6748600614","https://openalex.org/W6751754507","https://openalex.org/W6754984521","https://openalex.org/W6756188728","https://openalex.org/W6759423170","https://openalex.org/W6763000687","https://openalex.org/W6766923583","https://openalex.org/W6772932235","https://openalex.org/W6775928558","https://openalex.org/W6782591231","https://openalex.org/W6786128562","https://openalex.org/W6791199311","https://openalex.org/W6801582282","https://openalex.org/W6802511957","https://openalex.org/W6804346497","https://openalex.org/W6810293309","https://openalex.org/W6810536043","https://openalex.org/W6811242939","https://openalex.org/W6840734946"],"related_works":["https://openalex.org/W3162204513","https://openalex.org/W2371138613","https://openalex.org/W2048963458","https://openalex.org/W43109613","https://openalex.org/W2359952343","https://openalex.org/W2239445980","https://openalex.org/W2080152487","https://openalex.org/W3083152911","https://openalex.org/W3022347918","https://openalex.org/W4200527723"],"abstract_inverted_index":{"Deep":[0],"neural":[1],"networks":[2],"(DNNs)":[3],"exhibit":[4],"an":[5],"exceptional":[6],"capacity":[7],"for":[8,24,97],"generalization":[9,29,39,87,154],"in":[10,41,191],"practical":[11],"applications.":[12],"This":[13,150],"work":[14],"aims":[15],"to":[16,156,171],"capture":[17,157],"the":[18,38,44,49,53,59,69,74,78,103,114,117,125,129,158,164,179],"effect":[19],"and":[20,55,146,178],"benefits":[21],"of":[22,43,58,80,105,116,139,163],"depth":[23],"supervised":[25],"learning":[26],"via":[27],"information-theoretic":[28],"bounds.":[30],"We":[31],"first":[32],"derive":[33],"two":[34],"hierarchical":[35],"bounds":[36,99,155],"on":[37],"error":[40],"terms":[42],"Kullback-Leibler":[45],"(KL)":[46],"divergence":[47,65],"or":[48],"1-Wasserstein":[50,93],"distance":[51],"between":[52,136],"train":[54],"test":[56],"distributions":[57],"network":[60,165,187],"internal":[61],"representations.":[62],"The":[63],"KL":[64],"bound":[66,76],"shrinks":[67],"as":[68,85,160],"layer":[70,82],"index":[71],"increases,":[72],"while":[73],"Wasserstein":[75],"implies":[77],"existence":[79],"a":[81,86,91,161,174,201],"that":[83,183],"serves":[84],"funnel,":[88],"which":[89],"attains":[90],"minimal":[92],"distance.":[94],"Analytic":[95],"expressions":[96],"both":[98],"are":[100],"derived":[101],"under":[102],"setting":[104],"binary":[106],"Gaussian":[107,147],"classification":[108],"with":[109,173],"linear":[110],"DNNs.":[111],"To":[112],"quantify":[113],"contraction":[115,159],"relevant":[118],"information":[119],"measures":[120],"when":[121],"moving":[122],"deeper":[123,184],"into":[124],"network,":[126],"we":[127],"analyze":[128],"strong":[130],"data":[131],"processing":[132],"inequality":[133],"(SDPI)":[134],"coefficient":[135],"consecutive":[137],"layers":[138],"three":[140],"regularized":[141],"DNN":[142],"models:":[143],"Dropout,":[144],"DropConnect,":[145],"noise":[148],"injection.":[149],"enables":[151],"refining":[152],"our":[153,169],"function":[162],"architecture":[166],"parameters.":[167],"Specializing":[168],"results":[170],"DNNs":[172],"finite":[175],"parameter":[176],"space":[177],"Gibbs":[180],"algorithm":[181],"reveals":[182],"yet":[185],"narrower":[186],"architectures":[188],"generalize":[189],"better":[190],"those":[192],"examples,":[193],"although":[194],"how":[195],"broadly":[196],"this":[197],"statement":[198],"applies":[199],"remains":[200],"question.":[202]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
