{"id":"https://openalex.org/W7118582255","doi":"https://doi.org/10.48550/arxiv.2601.00455","title":"Deep Networks Learn Deep Hierarchical Models","display_name":"Deep Networks Learn Deep Hierarchical Models","publication_year":2026,"publication_date":"2026-01-01","ids":{"openalex":"https://openalex.org/W7118582255","doi":"https://doi.org/10.48550/arxiv.2601.00455"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2601.00455","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.00455","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2601.00455","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5049826794","display_name":"Amit Daniely","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Daniely, Amit","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5049826794"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12072","display_name":"Machine Learning and Algorithms","score":0.35929998755455017,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12072","display_name":"Machine Learning and Algorithms","score":0.35929998755455017,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.12250000238418579,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.08560000360012054,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/learnability","display_name":"Learnability","score":0.8406000137329102},{"id":"https://openalex.org/keywords/hierarchy","display_name":"Hierarchy","score":0.7170000076293945},{"id":"https://openalex.org/keywords/simple","display_name":"Simple (philosophy)","score":0.6712999939918518},{"id":"https://openalex.org/keywords/class","display_name":"Class (philosophy)","score":0.6543999910354614},{"id":"https://openalex.org/keywords/limit","display_name":"Limit (mathematics)","score":0.531000018119812},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.486299991607666},{"id":"https://openalex.org/keywords/hierarchical-organization","display_name":"Hierarchical organization","score":0.3165999948978424},{"id":"https://openalex.org/keywords/concept-class","display_name":"Concept class","score":0.3124000132083893}],"concepts":[{"id":"https://openalex.org/C2777723229","wikidata":"https://www.wikidata.org/wiki/Q4367921","display_name":"Learnability","level":2,"score":0.8406000137329102},{"id":"https://openalex.org/C31170391","wikidata":"https://www.wikidata.org/wiki/Q188619","display_name":"Hierarchy","level":2,"score":0.7170000076293945},{"id":"https://openalex.org/C2780586882","wikidata":"https://www.wikidata.org/wiki/Q7520643","display_name":"Simple (philosophy)","level":2,"score":0.6712999939918518},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.6543999910354614},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6155999898910522},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5622000098228455},{"id":"https://openalex.org/C151201525","wikidata":"https://www.wikidata.org/wiki/Q177239","display_name":"Limit (mathematics)","level":2,"score":0.531000018119812},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.486299991607666},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.3610999882221222},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.33869999647140503},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.320499986410141},{"id":"https://openalex.org/C2780217385","wikidata":"https://www.wikidata.org/wiki/Q2389284","display_name":"Hierarchical organization","level":2,"score":0.3165999948978424},{"id":"https://openalex.org/C203313322","wikidata":"https://www.wikidata.org/wiki/Q5158394","display_name":"Concept class","level":2,"score":0.3124000132083893},{"id":"https://openalex.org/C155512373","wikidata":"https://www.wikidata.org/wiki/Q287450","display_name":"Residual","level":2,"score":0.30550000071525574},{"id":"https://openalex.org/C64900535","wikidata":"https://www.wikidata.org/wiki/Q5753111","display_name":"Hierarchical network model","level":3,"score":0.3052999973297119},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.2872999906539917},{"id":"https://openalex.org/C90119067","wikidata":"https://www.wikidata.org/wiki/Q43260","display_name":"Polynomial","level":2,"score":0.28349998593330383},{"id":"https://openalex.org/C2781289151","wikidata":"https://www.wikidata.org/wiki/Q2903989","display_name":"Class hierarchy","level":3,"score":0.28200000524520874},{"id":"https://openalex.org/C144986985","wikidata":"https://www.wikidata.org/wiki/Q871236","display_name":"Hierarchical database model","level":2,"score":0.27230000495910645},{"id":"https://openalex.org/C2776608160","wikidata":"https://www.wikidata.org/wiki/Q4785462","display_name":"Natural (archaeology)","level":2,"score":0.2619999945163727},{"id":"https://openalex.org/C136389625","wikidata":"https://www.wikidata.org/wiki/Q334384","display_name":"Supervised learning","level":3,"score":0.2547000050544739},{"id":"https://openalex.org/C28427503","wikidata":"https://www.wikidata.org/wiki/Q13580300","display_name":"Internal model","level":3,"score":0.2538999915122986}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2601.00455","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.00455","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2601.00455","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.00455","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"score":0.6588068008422852,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"We":[0,181],"consider":[1],"supervised":[2],"learning":[3,80,143],"with":[4],"$n$":[5],"labels":[6,44,58],"and":[7],"show":[8],"that":[9,71,85,102,120,147,157,186,205],"layerwise":[10],"SGD":[11],"on":[12],"residual":[13],"networks":[14],"can":[15,111],"efficiently":[16],"learn":[17],"a":[18,129,188,192,201],"class":[19,25,68,101],"of":[20,29,50,64,91,122,151,173,197],"hierarchical":[21,124,158,202],"models.":[22],"This":[23],"model":[24,190],"assumes":[26],"the":[27,51,83,88,148,155,174,179],"existence":[28,150],"an":[30],"(unknown)":[31],"label":[32],"hierarchy":[33],"$L_1":[34],"\\subseteq":[35,37,39],"L_2":[36],"\\dots":[38],"L_r":[40],"=":[41],"[n]$,":[42],"where":[43,141,191],"in":[45,59,82,99,187],"$L_1$":[46],"are":[47,61,97,160],"simple":[48,62],"functions":[49,63],"input,":[52],"while":[53],"for":[54,131,139],"$i":[55],"&gt;":[56],"1$,":[57],"$L_i$":[60],"simpler":[65],"labels.":[66],"Our":[67],"surpasses":[69],"models":[70,98,110,125],"were":[72],"previously":[73],"shown":[74],"to":[75,106],"be":[76,112],"learnable":[77],"by":[78,114,178],"deep":[79,133,142],"algorithms,":[81],"sense":[84],"it":[86],"reaches":[87],"depth":[89,105],"limit":[90],"efficient":[92,207],"learnability.":[93,208],"That":[94],"is,":[95],"there":[96],"this":[100,183],"require":[103],"polynomial":[104],"express,":[107],"whereas":[108],"previous":[109],"computed":[113],"log-depth":[115],"circuits.":[116],"Furthermore,":[117],"we":[118,145],"suggest":[119],"learnability":[121],"such":[123],"might":[126],"eventually":[127],"form":[128],"basis":[130],"understanding":[132],"learning.":[134],"Beyond":[135],"their":[136,198],"natural":[137],"fit":[138],"domains":[140],"excels,":[144],"argue":[146],"mere":[149],"human":[152],"``teachers\"":[153],"supports":[154],"hypothesis":[156],"structures":[159],"inherently":[161],"available.":[162],"By":[163],"providing":[164],"granular":[165],"labels,":[166],"teachers":[167],"effectively":[168],"reveal":[169],"``hints''":[170],"or":[171],"``snippets''":[172],"internal":[175,199],"algorithms":[176],"used":[177],"brain.":[180],"formalize":[182],"intuition,":[184],"showing":[185],"simplified":[189],"teacher":[193],"is":[194],"partially":[195],"aware":[196],"logic,":[200],"structure":[203],"emerges":[204],"facilitates":[206]},"counts_by_year":[],"updated_date":"2026-01-08T20:10:11.968330","created_date":"2026-01-08T00:00:00"}
