{"id":"https://openalex.org/W7117649867","doi":"https://doi.org/10.48550/arxiv.2512.23329","title":"Deep learning for pedestrians: backpropagation in Transformers","display_name":"Deep learning for pedestrians: backpropagation in Transformers","publication_year":2025,"publication_date":"2025-12-29","ids":{"openalex":"https://openalex.org/W7117649867","doi":"https://doi.org/10.48550/arxiv.2512.23329"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2512.23329","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2512.23329","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2512.23329","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5075355103","display_name":"Laurent Bou\u00e9","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Bou\u00e9, Laurent","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5075355103"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.4332999885082245,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.4332999885082245,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.1331000030040741,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.06759999692440033,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/backpropagation","display_name":"Backpropagation","score":0.9129999876022339},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.6407999992370605},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.5379999876022339},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.4853000044822693},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.4745999872684479}],"concepts":[{"id":"https://openalex.org/C155032097","wikidata":"https://www.wikidata.org/wiki/Q798503","display_name":"Backpropagation","level":3,"score":0.9129999876022339},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7166000008583069},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6502000093460083},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.6407999992370605},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.5379999876022339},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.4853000044822693},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.4745999872684479},{"id":"https://openalex.org/C132010649","wikidata":"https://www.wikidata.org/wiki/Q189222","display_name":"Intuition","level":2,"score":0.45410001277923584},{"id":"https://openalex.org/C45357846","wikidata":"https://www.wikidata.org/wiki/Q2001982","display_name":"Notation","level":2,"score":0.44769999384880066},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3330000042915344},{"id":"https://openalex.org/C2779127903","wikidata":"https://www.wikidata.org/wiki/Q6510194","display_name":"Learning rule","level":3,"score":0.2628999948501587}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2512.23329","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2512.23329","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2512.23329","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2512.23329","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"This":[0],"document":[1],"is":[2,135],"a":[3,11,115,131],"follow-up":[4],"to":[5,10,45,68,101],"our":[6,41],"previous":[7],"paper":[8],"dedicated":[9],"vectorized":[12],"derivation":[13],"of":[14,48,91,130,143,145],"backpropagation":[15,76],"in":[16,26,89],"CNNs.":[17],"Following":[18],"the":[19,103,109,123],"same":[20],"principles":[21],"and":[22,55],"notations":[23],"already":[24],"put":[25],"place":[27],"there,":[28],"we":[29,39,60,113],"now":[30],"focus":[31],"on":[32],"transformer-based":[33],"next-token-prediction":[34],"architectures.":[35],"To":[36],"this":[37,85],"end,":[38],"apply":[40],"lightweight":[42],"index-free":[43],"methodology":[44],"new":[46],"types":[47],"layers":[49,67],"such":[50],"as":[51],"embedding,":[52],"multi-headed":[53],"self-attention":[54],"layer":[56],"normalization.":[57],"In":[58],"addition,":[59],"also":[61,136],"provide":[62],"gradient":[63,147],"expressions":[64,141],"for":[65,118,142],"LoRA":[66],"illustrate":[69],"parameter-efficient":[70],"fine-tuning.":[71],"Why":[72],"bother":[73],"doing":[74],"manual":[75],"when":[77,99],"there":[78],"are":[79],"so":[80],"many":[81],"tools":[82],"that":[83],"do":[84],"automatically?":[86],"Any":[87],"gap":[88],"understanding":[90],"how":[92,119],"values":[93],"propagate":[94],"forward":[95],"will":[96],"become":[97],"evident":[98],"attempting":[100],"differentiate":[102],"loss":[104],"function.":[105],"By":[106],"working":[107],"through":[108],"backward":[110],"pass":[111],"manually,":[112],"gain":[114],"deeper":[116],"intuition":[117],"each":[120],"operation":[121],"influences":[122],"final":[124],"output.":[125],"A":[126],"complete":[127],"PyTorch":[128],"implementation":[129],"minimalistic":[132],"GPT-like":[133],"network":[134],"provided":[137],"along":[138],"with":[139],"analytical":[140],"all":[144],"its":[146],"updates.":[148]},"counts_by_year":[],"updated_date":"2025-12-31T23:15:30.938425","created_date":"2025-12-31T00:00:00"}
