{"id":"https://openalex.org/W7101912637","doi":"https://doi.org/10.48550/arxiv.2510.23323","title":"Towards Scaling Deep Neural Networks with Predictive Coding: Theory and Practice","display_name":"Towards Scaling Deep Neural Networks with Predictive Coding: Theory and Practice","publication_year":2025,"publication_date":"2025-10-24","ids":{"openalex":"https://openalex.org/W7101912637","doi":"https://doi.org/10.48550/arxiv.2510.23323"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2510.23323","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2510.23323","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2510.23323","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Innocenti, Francesco","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Innocenti, Francesco","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":true,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.17949999868869781,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.17949999868869781,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.13130000233650208,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.058800000697374344,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.7141000032424927},{"id":"https://openalex.org/keywords/backpropagation","display_name":"Backpropagation","score":0.6053000092506409},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.5422999858856201},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.5069000124931335},{"id":"https://openalex.org/keywords/predictive-coding","display_name":"Predictive coding","score":0.4009999930858612},{"id":"https://openalex.org/keywords/range","display_name":"Range (aeronautics)","score":0.3513999879360199},{"id":"https://openalex.org/keywords/feed-forward","display_name":"Feed forward","score":0.3359000086784363},{"id":"https://openalex.org/keywords/supervised-learning","display_name":"Supervised learning","score":0.32580000162124634},{"id":"https://openalex.org/keywords/scaling","display_name":"Scaling","score":0.3237999975681305}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7208999991416931},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.7141000032424927},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.667900025844574},{"id":"https://openalex.org/C155032097","wikidata":"https://www.wikidata.org/wiki/Q798503","display_name":"Backpropagation","level":3,"score":0.6053000092506409},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.5422999858856201},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.5069000124931335},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.48750001192092896},{"id":"https://openalex.org/C2778061373","wikidata":"https://www.wikidata.org/wiki/Q1315146","display_name":"Predictive coding","level":3,"score":0.4009999930858612},{"id":"https://openalex.org/C204323151","wikidata":"https://www.wikidata.org/wiki/Q905424","display_name":"Range (aeronautics)","level":2,"score":0.3513999879360199},{"id":"https://openalex.org/C38858127","wikidata":"https://www.wikidata.org/wiki/Q5441228","display_name":"Feed forward","level":2,"score":0.3359000086784363},{"id":"https://openalex.org/C136389625","wikidata":"https://www.wikidata.org/wiki/Q334384","display_name":"Supervised learning","level":3,"score":0.32580000162124634},{"id":"https://openalex.org/C99844830","wikidata":"https://www.wikidata.org/wiki/Q102441924","display_name":"Scaling","level":2,"score":0.3237999975681305},{"id":"https://openalex.org/C117619785","wikidata":"https://www.wikidata.org/wiki/Q6094414","display_name":"Iterative learning control","level":3,"score":0.3224000036716461},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.30889999866485596},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.30239999294281006},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.30090001225471497},{"id":"https://openalex.org/C2780586882","wikidata":"https://www.wikidata.org/wiki/Q7520643","display_name":"Simple (philosophy)","level":2,"score":0.290800005197525},{"id":"https://openalex.org/C112972136","wikidata":"https://www.wikidata.org/wiki/Q7595718","display_name":"Stability (learning theory)","level":2,"score":0.28870001435279846},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.28139999508857727},{"id":"https://openalex.org/C77405623","wikidata":"https://www.wikidata.org/wiki/Q598451","display_name":"System dynamics","level":2,"score":0.2732999920845032},{"id":"https://openalex.org/C2984842247","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep neural networks","level":3,"score":0.265500009059906},{"id":"https://openalex.org/C47702885","wikidata":"https://www.wikidata.org/wiki/Q5441227","display_name":"Feedforward neural network","level":3,"score":0.26510000228881836},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.2628999948501587},{"id":"https://openalex.org/C123757187","wikidata":"https://www.wikidata.org/wiki/Q9195957","display_name":"Network dynamics","level":2,"score":0.2624000012874603},{"id":"https://openalex.org/C159694833","wikidata":"https://www.wikidata.org/wiki/Q2321565","display_name":"Iterative method","level":2,"score":0.26080000400543213},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.2563000023365021}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2510.23323","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2510.23323","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2510.23323","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2510.23323","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Backpropagation":[0],"(BP)":[1],"is":[2,23,185,270],"the":[3,8,32,94,132,178,195,207,221,249,258],"standard":[4],"algorithm":[5,43],"for":[6,175,220,260],"training":[7,226],"deep":[9,106],"neural":[10],"networks":[11,51,177,230],"that":[12,70,131,162,174],"power":[13],"modern":[14],"artificial":[15],"intelligence":[16],"including":[17],"large":[18],"language":[19],"models.":[20],"However,":[21,86],"BP":[22,274],"energy":[24],"inefficient":[25],"and":[26,96,105,189,234,251],"unlikely":[27],"to":[28,191,263,271],"be":[29,138],"implemented":[30],"by":[31,55,119,203],"brain.":[33],"This":[34],"thesis":[35,242],"studies":[36],"an":[37,141],"alternative,":[38],"potentially":[39],"more":[40,187],"efficient":[41],"brain-inspired":[42],"called":[44,217],"predictive":[45],"coding":[46],"(PC).":[47],"Unlike":[48],"BP,":[49,81],"PC":[50,136,163,183,269],"(PCNs)":[52],"perform":[53],"inference":[54,73,95,208,250],"iterative":[56,72],"equilibration":[57],"of":[58,78,99,135,169,206,210,227,248,254],"neuron":[59],"activities":[60],"before":[61],"learning":[62,97,133,252],"or":[63],"weight":[64],"updates.":[65,154],"Recent":[66],"work":[67],"has":[68],"suggested":[69],"this":[71,158,241],"procedure":[74],"provides":[75],"a":[76,121,204,214],"range":[77],"benefits":[79],"over":[80],"such":[82,173],"as":[83,140],"faster":[84],"training.":[85],"these":[87],"advantages":[88],"have":[89],"not":[90],"been":[91],"consistently":[92],"observed,":[93],"dynamics":[98,134,209,253],"PCNs":[100,107,118],"are":[101],"still":[102],"poorly":[103],"understood,":[104],"remain":[108],"practically":[109],"untrainable.":[110],"Here,":[111],"we":[112,129,160,212],"make":[113,167],"significant":[114],"progress":[115],"towards":[116],"scaling":[117],"taking":[120],"theoretical":[122],"approach":[123],"grounded":[124],"in":[125,165],"optimisation":[126],"theory.":[127],"First,":[128],"show":[130,161],"can":[137,164],"understood":[139],"approximate":[142],"trust-region":[143],"method":[144],"using":[145,150],"second-order":[146],"information,":[147,172],"despite":[148],"explicitly":[149],"only":[151],"first-order":[152],"local":[153],"Second,":[155],"going":[156],"beyond":[157],"approximation,":[159],"principle":[166],"use":[168],"arbitrarily":[170],"higher-order":[171],"feedforward":[176],"effective":[179],"landscape":[180],"on":[181,237,265],"which":[182,219],"learns":[184],"far":[186],"benign":[188],"robust":[190],"vanishing":[192],"gradients":[193],"than":[194],"(mean":[196],"squared":[197],"error)":[198],"loss":[199],"landscape.":[200],"Third,":[201],"motivated":[202],"study":[205],"PCNs,":[211,255],"propose":[213],"new":[215],"parameterisation":[216],"\"$\u03bc$PC\",":[218],"first":[222],"time":[223],"allows":[224],"stable":[225],"100+":[228],"layer":[229],"with":[231,273],"little":[232],"tuning":[233],"competitive":[235],"performance":[236],"simple":[238],"tasks.":[239],"Overall,":[240],"significantly":[243],"advances":[244],"our":[245],"fundamental":[246],"understanding":[247],"while":[256],"highlighting":[257],"need":[259],"future":[261],"research":[262],"focus":[264],"hardware":[266],"co-design":[267],"if":[268],"compete":[272],"at":[275],"scale.":[276]},"counts_by_year":[],"updated_date":"2025-11-06T06:51:31.235846","created_date":"2025-10-29T00:00:00"}
