{"id":"https://openalex.org/W4399115839","doi":"https://doi.org/10.48550/arxiv.2405.16639","title":"A direct proof of a unified law of robustness for Bregman divergence losses","display_name":"A direct proof of a unified law of robustness for Bregman divergence losses","publication_year":2024,"publication_date":"2024-05-26","ids":{"openalex":"https://openalex.org/W4399115839","doi":"https://doi.org/10.48550/arxiv.2405.16639"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2405.16639","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2405.16639","pdf_url":"https://arxiv.org/pdf/2405.16639","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":null},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2405.16639","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5071688825","display_name":"Santanu Das","orcid":"https://orcid.org/0000-0002-0147-449X"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Das, Santanu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5059145110","display_name":"Jatin Batra","orcid":"https://orcid.org/0000-0002-7174-9778"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Batra, Jatin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5101900831","display_name":"Piyush Srivastava","orcid":"https://orcid.org/0000-0003-0953-2890"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Srivastava, Piyush","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5071688825"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11871","display_name":"Advanced Statistical Methods and Models","score":0.9872000217437744,"subfield":{"id":"https://openalex.org/subfields/2613","display_name":"Statistics and Probability"},"field":{"id":"https://openalex.org/fields/26","display_name":"Mathematics"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11871","display_name":"Advanced Statistical Methods and Models","score":0.9872000217437744,"subfield":{"id":"https://openalex.org/subfields/2613","display_name":"Statistics and Probability"},"field":{"id":"https://openalex.org/fields/26","display_name":"Mathematics"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10968","display_name":"Statistical Distribution Estimation and Applications","score":0.9677000045776367,"subfield":{"id":"https://openalex.org/subfields/2613","display_name":"Statistics and Probability"},"field":{"id":"https://openalex.org/fields/26","display_name":"Mathematics"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11443","display_name":"Advanced Statistical Process Monitoring","score":0.9373000264167786,"subfield":{"id":"https://openalex.org/subfields/1804","display_name":"Statistics, Probability and Uncertainty"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.8440612554550171},{"id":"https://openalex.org/keywords/bregman-divergence","display_name":"Bregman divergence","score":0.7847532033920288},{"id":"https://openalex.org/keywords/divergence","display_name":"Divergence (linguistics)","score":0.6127643585205078},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.41779541969299316},{"id":"https://openalex.org/keywords/applied-mathematics","display_name":"Applied mathematics","score":0.3841134011745453},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.358612060546875},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.32106760144233704},{"id":"https://openalex.org/keywords/philosophy","display_name":"Philosophy","score":0.13466107845306396}],"concepts":[{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.8440612554550171},{"id":"https://openalex.org/C149073432","wikidata":"https://www.wikidata.org/wiki/Q4960382","display_name":"Bregman divergence","level":2,"score":0.7847532033920288},{"id":"https://openalex.org/C207390915","wikidata":"https://www.wikidata.org/wiki/Q1230525","display_name":"Divergence (linguistics)","level":2,"score":0.6127643585205078},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.41779541969299316},{"id":"https://openalex.org/C28826006","wikidata":"https://www.wikidata.org/wiki/Q33521","display_name":"Applied mathematics","level":1,"score":0.3841134011745453},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.358612060546875},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.32106760144233704},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.13466107845306396},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2405.16639","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2405.16639","pdf_url":"https://arxiv.org/pdf/2405.16639","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":null},{"id":"doi:10.48550/arxiv.2405.16639","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2405.16639","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2405.16639","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2405.16639","pdf_url":"https://arxiv.org/pdf/2405.16639","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":null},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G4459151492","display_name":null,"funder_award_id":"RTI4001","funder_id":"https://openalex.org/F4320320719","funder_display_name":"Department of Science and Technology, Ministry of Science and Technology, India"}],"funders":[{"id":"https://openalex.org/F4320307786","display_name":"Adobe Systems","ror":"https://ror.org/059tvcg64"},{"id":"https://openalex.org/F4320320715","display_name":"Tata Institute of Fundamental Research","ror":"https://ror.org/03ht1xw27"},{"id":"https://openalex.org/F4320320719","display_name":"Department of Science and Technology, Ministry of Science and Technology, India","ror":"https://ror.org/0101xrq71"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4290793960","https://openalex.org/W2895052949","https://openalex.org/W2804257782","https://openalex.org/W4299147440","https://openalex.org/W4304099439","https://openalex.org/W2885258126","https://openalex.org/W2213789179","https://openalex.org/W4295845550","https://openalex.org/W4309585996","https://openalex.org/W4309651314"],"abstract_inverted_index":{"In":[0,51,184],"contemporary":[1],"deep":[2],"learning":[3],"practice,":[4],"models":[5],"are":[6,237],"often":[7],"trained":[8],"to":[9,14,48,70,130,138,154,171,213],"near":[10],"zero":[11],"loss":[12,76,80,142],"i.e.":[13,123],"nearly":[15],"interpolate":[16,71],"the":[17,21,26,33,39,66,73,79,82,86,89,125,162,178,189,222,231,246,250,254,257,266,279],"training":[18,75],"data.":[19],"However,":[20],"number":[22,34],"of":[23,35,55,64,81,85,94,101,110,112,180,193,199,224,241,253,278,283],"parameters":[24],"in":[25,177,197],"model":[27,67],"is":[28,68,118,128,249],"usually":[29],"far":[30],"more":[31,172,275],"than":[32],"data":[36],"points":[37],"n,":[38],"theoretical":[40],"minimum":[41],"needed":[42],"for":[43,97,120,218,244],"interpolation:":[44,65],"a":[45,61,98,107,144,152,200,211,238,274],"phenomenon":[46],"referred":[47],"as":[49,161,227,265],"overparameterization.":[50],"an":[52],"interesting":[53],"piece":[54],"work,":[56,186],"Bubeck":[57,194,284],"and":[58,96,157,195,204,259,285,287],"Sellke":[59,196,286],"considered":[60],"natural":[62,108,239],"notion":[63,93,109],"said":[69],"when":[72],"model's":[74],"goes":[77],"below":[78],"conditional":[83,251],"expectation":[84,252],"response":[87,182,255],"given":[88,256],"covariate.":[90],"For":[91],"this":[92,185,207],"interpolation":[95,122],"broad":[99,290],"class":[100,240],"covariate":[102],"distributions":[103],"(specifically":[104],"those":[105],"satisfying":[106],"concentration":[111],"measure),":[113],"they":[114,148],"showed":[115],"that":[116,150,206],"overparameterization":[117],"necessary":[119],"robust":[121],"if":[124],"interpolating":[126],"function":[127],"required":[129],"be":[131,169],"Lipschitz.":[132],"Their":[133],"main":[134,280],"proof":[135,191,281],"technique":[136,192,282],"applies":[137],"regression":[139],"with":[140],"square":[141],"against":[143],"scalar":[145,181],"response,":[146],"but":[147],"remark":[149],"via":[151],"connection":[153],"Rademacher":[155,228],"complexity":[156,229],"using":[158],"tools":[159,225],"such":[160,226,264],"Ledoux-Talagrand":[163,232],"contraction":[164,233],"inequality,":[165],"their":[166],"result":[167],"can":[168],"extended":[170],"general":[173,276],"losses,":[174],"at":[175],"least":[176],"case":[179],"variables.":[183],"we":[187],"recast":[188],"original":[190],"terms":[198],"bias-variance":[201],"type":[202],"decomposition,":[203],"show":[205],"view":[208],"directly":[209],"unlocks":[210],"generalization":[212],"Bregman":[214,235],"divergence":[215],"losses":[216,242,263],"(even":[217],"vector-valued":[219],"responses),":[220],"without":[221],"use":[223],"or":[230],"principle.":[234],"divergences":[236],"since":[243],"these,":[245],"best":[247],"estimator":[248],"covariate,":[258],"include":[260],"other":[261],"practical":[262],"cross":[267],"entropy":[268],"loss.":[269],"Our":[270],"work":[271],"thus":[272],"gives":[273],"understanding":[277],"demonstrates":[288],"its":[289],"utility.":[291]},"counts_by_year":[],"updated_date":"2026-05-18T08:16:58.900851","created_date":"2024-05-29T00:00:00"}
