{"id":"https://openalex.org/W4414563651","doi":"https://doi.org/10.1109/tnnls.2025.3610665","title":"<i>Ape</i> Optimizer: A p-Power Adaptive Filter-Based Approach for Deep Learning Optimization","display_name":"<i>Ape</i> Optimizer: A p-Power Adaptive Filter-Based Approach for Deep Learning Optimization","publication_year":2025,"publication_date":"2025-09-25","ids":{"openalex":"https://openalex.org/W4414563651","doi":"https://doi.org/10.1109/tnnls.2025.3610665","pmid":"https://pubmed.ncbi.nlm.nih.gov/40996994"},"language":"en","primary_location":{"id":"doi:10.1109/tnnls.2025.3610665","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tnnls.2025.3610665","pdf_url":null,"source":{"id":"https://openalex.org/S4210175523","display_name":"IEEE Transactions on Neural Networks and Learning Systems","issn_l":"2162-237X","issn":["2162-237X","2162-2388"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Neural Networks and Learning Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Yufei Jin","orcid":"https://orcid.org/0009-0002-6671-931X"},"institutions":[{"id":"https://openalex.org/I4210116924","display_name":"Chinese University of Hong Kong, Shenzhen","ror":"https://ror.org/02d5ks197","country_code":"CN","type":"education","lineage":["https://openalex.org/I177725633","https://openalex.org/I180726961","https://openalex.org/I4210116924"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yufei Jin","raw_affiliation_strings":["School of Science and Engineering, The Chinese University of Hong Kong, Shenzhen, Guangdong, China"],"affiliations":[{"raw_affiliation_string":"School of Science and Engineering, The Chinese University of Hong Kong, Shenzhen, Guangdong, China","institution_ids":["https://openalex.org/I4210116924"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110937107","display_name":"Han Yang","orcid":null},"institutions":[{"id":"https://openalex.org/I4210116924","display_name":"Chinese University of Hong Kong, Shenzhen","ror":"https://ror.org/02d5ks197","country_code":"CN","type":"education","lineage":["https://openalex.org/I177725633","https://openalex.org/I180726961","https://openalex.org/I4210116924"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Han Yang","raw_affiliation_strings":["School of Science and Engineering, The Chinese University of Hong Kong, Shenzhen, Guangdong, China"],"affiliations":[{"raw_affiliation_string":"School of Science and Engineering, The Chinese University of Hong Kong, Shenzhen, Guangdong, China","institution_ids":["https://openalex.org/I4210116924"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100768848","display_name":"Xinrui Wang","orcid":"https://orcid.org/0000-0002-3085-5697"},"institutions":[{"id":"https://openalex.org/I4210116924","display_name":"Chinese University of Hong Kong, Shenzhen","ror":"https://ror.org/02d5ks197","country_code":"CN","type":"education","lineage":["https://openalex.org/I177725633","https://openalex.org/I180726961","https://openalex.org/I4210116924"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xinrui Wang","raw_affiliation_strings":["School of Science and Engineering, The Chinese University of Hong Kong, Shenzhen, Guangdong, China"],"affiliations":[{"raw_affiliation_string":"School of Science and Engineering, The Chinese University of Hong Kong, Shenzhen, Guangdong, China","institution_ids":["https://openalex.org/I4210116924"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101265758","display_name":"Yingche Xu","orcid":null},"institutions":[{"id":"https://openalex.org/I4210116924","display_name":"Chinese University of Hong Kong, Shenzhen","ror":"https://ror.org/02d5ks197","country_code":"CN","type":"education","lineage":["https://openalex.org/I177725633","https://openalex.org/I180726961","https://openalex.org/I4210116924"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yingche Xu","raw_affiliation_strings":["School of Science and Engineering, The Chinese University of Hong Kong, Shenzhen, Guangdong, China"],"affiliations":[{"raw_affiliation_string":"School of Science and Engineering, The Chinese University of Hong Kong, Shenzhen, Guangdong, China","institution_ids":["https://openalex.org/I4210116924"]}]},{"author_position":"last","author":{"id":null,"display_name":"Zhuoran Zhang","orcid":"https://orcid.org/0000-0002-8737-4210"},"institutions":[{"id":"https://openalex.org/I4210116924","display_name":"Chinese University of Hong Kong, Shenzhen","ror":"https://ror.org/02d5ks197","country_code":"CN","type":"education","lineage":["https://openalex.org/I177725633","https://openalex.org/I180726961","https://openalex.org/I4210116924"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhuoran Zhang","raw_affiliation_strings":["School of Science and Engineering, The Chinese University of Hong Kong, Shenzhen, Guangdong, China"],"affiliations":[{"raw_affiliation_string":"School of Science and Engineering, The Chinese University of Hong Kong, Shenzhen, Guangdong, China","institution_ids":["https://openalex.org/I4210116924"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I4210116924"],"apc_list":null,"apc_paid":null,"fwci":4.7137,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.95163858,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":91,"max":98},"biblio":{"volume":"37","issue":"2","first_page":"673","last_page":"685"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10320","display_name":"Neural Networks and Applications","score":0.6194999814033508,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10320","display_name":"Neural Networks and Applications","score":0.6194999814033508,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.6791999936103821},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.6347000002861023},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.5755000114440918},{"id":"https://openalex.org/keywords/noise","display_name":"Noise (video)","score":0.526199996471405},{"id":"https://openalex.org/keywords/gaussian","display_name":"Gaussian","score":0.4885999858379364},{"id":"https://openalex.org/keywords/gaussian-noise","display_name":"Gaussian noise","score":0.4562000036239624},{"id":"https://openalex.org/keywords/gaussian-process","display_name":"Gaussian process","score":0.4327999949455261},{"id":"https://openalex.org/keywords/noise-measurement","display_name":"Noise measurement","score":0.34439998865127563}],"concepts":[{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.6791999936103821},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6753000020980835},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6686000227928162},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.6347000002861023},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.5755000114440918},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.526199996471405},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5109000205993652},{"id":"https://openalex.org/C163716315","wikidata":"https://www.wikidata.org/wiki/Q901177","display_name":"Gaussian","level":2,"score":0.4885999858379364},{"id":"https://openalex.org/C4199805","wikidata":"https://www.wikidata.org/wiki/Q2725903","display_name":"Gaussian noise","level":2,"score":0.4562000036239624},{"id":"https://openalex.org/C61326573","wikidata":"https://www.wikidata.org/wiki/Q1496376","display_name":"Gaussian process","level":3,"score":0.4327999949455261},{"id":"https://openalex.org/C29265498","wikidata":"https://www.wikidata.org/wiki/Q7047719","display_name":"Noise measurement","level":3,"score":0.34439998865127563},{"id":"https://openalex.org/C2987595161","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Optimization algorithm","level":2,"score":0.3425000011920929},{"id":"https://openalex.org/C9652623","wikidata":"https://www.wikidata.org/wiki/Q190109","display_name":"Field (mathematics)","level":2,"score":0.3407999873161316},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.33959999680519104},{"id":"https://openalex.org/C2781170535","wikidata":"https://www.wikidata.org/wiki/Q30587856","display_name":"Noisy data","level":2,"score":0.3224000036716461},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.2939000129699707},{"id":"https://openalex.org/C125014702","wikidata":"https://www.wikidata.org/wiki/Q4680749","display_name":"Adaptive learning","level":2,"score":0.28850001096725464},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.28540000319480896},{"id":"https://openalex.org/C136389625","wikidata":"https://www.wikidata.org/wiki/Q334384","display_name":"Supervised learning","level":3,"score":0.2775000035762787},{"id":"https://openalex.org/C2984842247","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep neural networks","level":3,"score":0.27160000801086426},{"id":"https://openalex.org/C2986577269","wikidata":"https://www.wikidata.org/wiki/Q11306265","display_name":"Random noise","level":2,"score":0.2678000032901764},{"id":"https://openalex.org/C137836250","wikidata":"https://www.wikidata.org/wiki/Q984063","display_name":"Optimization problem","level":2,"score":0.2540999948978424},{"id":"https://openalex.org/C120936955","wikidata":"https://www.wikidata.org/wiki/Q2155640","display_name":"Empirical research","level":2,"score":0.25189998745918274},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.251800000667572}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tnnls.2025.3610665","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tnnls.2025.3610665","pdf_url":null,"source":{"id":"https://openalex.org/S4210175523","display_name":"IEEE Transactions on Neural Networks and Learning Systems","issn_l":"2162-237X","issn":["2162-237X","2162-2388"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Neural Networks and Learning Systems","raw_type":"journal-article"},{"id":"pmid:40996994","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/40996994","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on neural networks and learning systems","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G7364764902","display_name":null,"funder_award_id":"2023YFE0205500","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"},{"id":"https://openalex.org/G7727633222","display_name":null,"funder_award_id":"62203374","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8680336817","display_name":null,"funder_award_id":"2024A1515010160","funder_id":"https://openalex.org/F4320320671","funder_display_name":"National Research Foundation"}],"funders":[{"id":"https://openalex.org/F4320320671","display_name":"National Research Foundation","ror":"https://ror.org/05s0g1g46"},{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320335777","display_name":"National Key Research and Development Program of China","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":28,"referenced_works":["https://openalex.org/W1535810436","https://openalex.org/W1890984361","https://openalex.org/W1980287119","https://openalex.org/W1994616650","https://openalex.org/W1999542175","https://openalex.org/W2112796928","https://openalex.org/W2114998197","https://openalex.org/W2133247229","https://openalex.org/W2302255633","https://openalex.org/W2402268235","https://openalex.org/W2919115771","https://openalex.org/W2936774411","https://openalex.org/W2947542756","https://openalex.org/W2953328958","https://openalex.org/W2964137095","https://openalex.org/W3003534552","https://openalex.org/W3084521418","https://openalex.org/W3096831136","https://openalex.org/W3193987867","https://openalex.org/W4205637966","https://openalex.org/W4224281861","https://openalex.org/W4307341156","https://openalex.org/W4360898066","https://openalex.org/W4392693667","https://openalex.org/W4394804915","https://openalex.org/W4395056517","https://openalex.org/W4398226200","https://openalex.org/W4402809720"],"related_works":[],"abstract_inverted_index":{"Deep":[0],"learning":[1,174],"has":[2],"been":[3],"widely":[4],"applied":[5],"in":[6,151,171,184],"various":[7],"domains.":[8],"Current":[9],"widely-used":[10],"optimizers,":[11],"such":[12],"as":[13],"SGD,":[14],"Adam,":[15],"and":[16,72,117,155,177],"their":[17],"variants,":[18],"are":[19],"designed":[20,76],"based":[21],"on":[22,69,145],"the":[23,26,44,55,70,78,85,92,122,166,179],"assumption":[24,79],"that":[25,43],"gradient":[27,45,126],"noise":[28,46,56],"generated":[29],"during":[30],"model":[31],"training":[32,156],"follows":[33],"a":[34,51,99,109],"Gaussian":[35,52,81],"distribution.":[36,53],"However,":[37],"recent":[38],"empirical":[39],"studies":[40],"have":[41],"found":[42],"often":[47],"does":[48],"not":[49],"follow":[50],"Instead,":[54],"exhibits":[57],"heavy-tailed":[58,125],"characteristics":[59],"consistent":[60],"with":[61],"an":[62,131],"$\\alpha":[63,139],"$":[64,140],"-stable":[65,141],"distribution,":[66],"casting":[67],"doubt":[68],"performance":[71],"robustness":[73],"of":[74,80,94,124,168],"optimizers":[75],"under":[77],"noise.":[82],"Inspired":[83],"by":[84],"least":[86],"mean":[87],"p-power":[88,110],"(LMP)":[89],"algorithm":[90],"from":[91],"field":[93],"adaptive":[95],"filtering,":[96],"we":[97],"propose":[98],"novel":[100],"optimizer":[101,164],"called":[102],"Ape":[103,107,163],"for":[104,133,181],"deep":[105,173],"learning.":[106],"integrates":[108],"adjustment":[111],"mechanism":[112],"to":[113,138,159],"compress":[114],"large":[115],"gradients":[116],"amplify":[118],"small":[119],"ones,":[120],"mitigating":[121],"impact":[123],"distributions.":[127,142],"It":[128],"also":[129],"employs":[130],"approach":[132],"estimating":[134],"second":[135],"moments":[136],"tailored":[137],"Extensive":[143],"experiments":[144],"benchmark":[146],"datasets":[147],"demonstrate":[148],"Ape's":[149],"effectiveness":[150],"improving":[152],"both":[153],"accuracy":[154],"speed":[157],"compared":[158],"existing":[160],"optimizers.":[161],"The":[162],"showcases":[165],"potential":[167],"cross-disciplinary":[169],"approaches":[170],"advancing":[172],"optimization":[175],"techniques":[176],"lays":[178],"groundwork":[180],"future":[182],"innovations":[183],"this":[185],"domain.":[186]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":1}],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-10-10T00:00:00"}
