{"id":"https://openalex.org/W4415524215","doi":"https://doi.org/10.1109/mlsp62443.2025.11204263","title":"Fast and Robust Training of Deep Learning Models with Multiplicative Adagrad","display_name":"Fast and Robust Training of Deep Learning Models with Multiplicative Adagrad","publication_year":2025,"publication_date":"2025-08-31","ids":{"openalex":"https://openalex.org/W4415524215","doi":"https://doi.org/10.1109/mlsp62443.2025.11204263"},"language":null,"primary_location":{"id":"doi:10.1109/mlsp62443.2025.11204263","is_oa":false,"landing_page_url":"https://doi.org/10.1109/mlsp62443.2025.11204263","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE 35th International Workshop on Machine Learning for Signal Processing (MLSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5039953793","display_name":"Manos Kirtas","orcid":"https://orcid.org/0000-0002-8670-0248"},"institutions":[{"id":"https://openalex.org/I21370196","display_name":"Aristotle University of Thessaloniki","ror":"https://ror.org/02j61yw88","country_code":"GR","type":"education","lineage":["https://openalex.org/I21370196"]}],"countries":["GR"],"is_corresponding":true,"raw_author_name":"Manos Kirtas","raw_affiliation_strings":["Aristotle University of Thessaloniki,Computational Intelligence and Deep Learning Group,Dept. of Informatics,Thessaloniki,Greece"],"affiliations":[{"raw_affiliation_string":"Aristotle University of Thessaloniki,Computational Intelligence and Deep Learning Group,Dept. of Informatics,Thessaloniki,Greece","institution_ids":["https://openalex.org/I21370196"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5061050264","display_name":"Nikolaos Passalis","orcid":"https://orcid.org/0000-0003-1177-9139"},"institutions":[{"id":"https://openalex.org/I21370196","display_name":"Aristotle University of Thessaloniki","ror":"https://ror.org/02j61yw88","country_code":"GR","type":"education","lineage":["https://openalex.org/I21370196"]}],"countries":["GR"],"is_corresponding":false,"raw_author_name":"Nikolaos Passalis","raw_affiliation_strings":["Aristotle University of Thessaloniki,Computational Intelligence and Deep Learning Group,Dept. of Chemical Engineering,Thessaloniki,Greece"],"affiliations":[{"raw_affiliation_string":"Aristotle University of Thessaloniki,Computational Intelligence and Deep Learning Group,Dept. of Chemical Engineering,Thessaloniki,Greece","institution_ids":["https://openalex.org/I21370196"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5041054091","display_name":"Anastasios Tefas","orcid":"https://orcid.org/0000-0003-1288-3667"},"institutions":[{"id":"https://openalex.org/I21370196","display_name":"Aristotle University of Thessaloniki","ror":"https://ror.org/02j61yw88","country_code":"GR","type":"education","lineage":["https://openalex.org/I21370196"]}],"countries":["GR"],"is_corresponding":false,"raw_author_name":"Anastasios Tefas","raw_affiliation_strings":["Aristotle University of Thessaloniki,Computational Intelligence and Deep Learning Group,Dept. of Informatics,Thessaloniki,Greece"],"affiliations":[{"raw_affiliation_string":"Aristotle University of Thessaloniki,Computational Intelligence and Deep Learning Group,Dept. of Informatics,Thessaloniki,Greece","institution_ids":["https://openalex.org/I21370196"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5039953793"],"corresponding_institution_ids":["https://openalex.org/I21370196"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.16996137,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12676","display_name":"Machine Learning and ELM","score":0.7394999861717224,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12676","display_name":"Machine Learning and ELM","score":0.7394999861717224,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10320","display_name":"Neural Networks and Applications","score":0.7006000280380249,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.617900013923645},{"id":"https://openalex.org/keywords/multiplicative-function","display_name":"Multiplicative function","score":0.5888000130653381},{"id":"https://openalex.org/keywords/normalization","display_name":"Normalization (sociology)","score":0.5767999887466431},{"id":"https://openalex.org/keywords/range","display_name":"Range (aeronautics)","score":0.45890000462532043},{"id":"https://openalex.org/keywords/convergence","display_name":"Convergence (economics)","score":0.4510999917984009},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.4503999948501587},{"id":"https://openalex.org/keywords/maxima-and-minima","display_name":"Maxima and minima","score":0.36660000681877136},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.329800009727478}],"concepts":[{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6614000201225281},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6381999850273132},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.617900013923645},{"id":"https://openalex.org/C42747912","wikidata":"https://www.wikidata.org/wiki/Q1048447","display_name":"Multiplicative function","level":2,"score":0.5888000130653381},{"id":"https://openalex.org/C136886441","wikidata":"https://www.wikidata.org/wiki/Q926129","display_name":"Normalization (sociology)","level":2,"score":0.5767999887466431},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5077999830245972},{"id":"https://openalex.org/C204323151","wikidata":"https://www.wikidata.org/wiki/Q905424","display_name":"Range (aeronautics)","level":2,"score":0.45890000462532043},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.4510999917984009},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.4503999948501587},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.38280001282691956},{"id":"https://openalex.org/C186633575","wikidata":"https://www.wikidata.org/wiki/Q845060","display_name":"Maxima and minima","level":2,"score":0.36660000681877136},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3628000020980835},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.329800009727478},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.32519999146461487},{"id":"https://openalex.org/C61797465","wikidata":"https://www.wikidata.org/wiki/Q1188986","display_name":"Term (time)","level":2,"score":0.2955000102519989},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.2953000068664551},{"id":"https://openalex.org/C117896860","wikidata":"https://www.wikidata.org/wiki/Q11376","display_name":"Acceleration","level":2,"score":0.29249998927116394},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.28940001130104065},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.2847000062465668},{"id":"https://openalex.org/C196083921","wikidata":"https://www.wikidata.org/wiki/Q7915758","display_name":"Variance (accounting)","level":2,"score":0.2766000032424927},{"id":"https://openalex.org/C75684735","wikidata":"https://www.wikidata.org/wiki/Q858810","display_name":"Big data","level":2,"score":0.27140000462532043},{"id":"https://openalex.org/C112972136","wikidata":"https://www.wikidata.org/wiki/Q7595718","display_name":"Stability (learning theory)","level":2,"score":0.2705000042915344},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.26919999718666077},{"id":"https://openalex.org/C75294576","wikidata":"https://www.wikidata.org/wiki/Q5165192","display_name":"Contextual image classification","level":3,"score":0.2685999870300293},{"id":"https://openalex.org/C2776135515","wikidata":"https://www.wikidata.org/wiki/Q17143721","display_name":"Regularization (linguistics)","level":2,"score":0.25839999318122864},{"id":"https://openalex.org/C5465570","wikidata":"https://www.wikidata.org/wiki/Q5326898","display_name":"Early stopping","level":3,"score":0.2538999915122986}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/mlsp62443.2025.11204263","is_oa":false,"landing_page_url":"https://doi.org/10.1109/mlsp62443.2025.11204263","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE 35th International Workshop on Machine Learning for Signal Processing (MLSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":6,"referenced_works":["https://openalex.org/W1677182931","https://openalex.org/W2129113961","https://openalex.org/W2150865801","https://openalex.org/W2189422931","https://openalex.org/W4391312451","https://openalex.org/W4405944334"],"related_works":[],"abstract_inverted_index":{"Even":[0],"though":[1],"Deep":[2],"Learning":[3,98],"architectures":[4],"achieve":[5],"state-of-the-art":[6],"performance":[7],"in":[8,92,103,148],"various":[9],"tasks,":[10],"the":[11,25,38,69,78,85,89,93,121,130,141,154,165],"training":[12],"process":[13],"remains":[14],"highly":[15],"sensitive":[16],"to":[17,41,68,113,126,133,137],"hyperparameters,":[18],"initial":[19,79,142],"weights,":[20],"and":[21,29,50,74,99,128,167],"data":[22],"distributions,":[23],"making":[24],"development":[26,95],"of":[27,55,72,81,88,96,144],"fast":[28],"stable":[30],"optimization":[31],"methods":[32],"a":[33,52,108],"challenging":[34],"task.":[35],"Adagrad":[36,162],"was":[37],"first":[39],"optimizer":[40],"significantly":[42,115],"outperform":[43],"SGD,":[44],"applying":[45],"an":[46],"adaptive":[47],"learning":[48,75],"approach":[49],"inspiring":[51],"wide":[53],"range":[54],"modern":[56],"optimizers.":[57],"However,":[58],"even":[59],"nowadays":[60],"many":[61],"optimizers":[62],"suffer":[63],"from":[64],"convergence":[65],"issues":[66],"related":[67],"high":[70],"variance":[71],"gradients":[73,127],"rates":[76],"at":[77,140],"stage":[80,143],"training.":[82],"Motivated":[83],"by":[84],"significant":[86,138],"contribution":[87],"multiplicative":[90,109],"updates":[91],"early":[94],"Machine":[97],"recent":[100],"preliminary":[101],"results,":[102],"this":[104],"work,":[105],"we":[106,158],"propose":[107],"update":[110,156],"term":[111,123],"oriented":[112],"Adagrad,":[114],"improving":[116],"its":[117],"performance.":[118],"More":[119],"specifically,":[120],"proposed":[122,155],"employs":[124],"normalization":[125],"scales":[129],"parameters":[131],"according":[132],"their":[134],"magnitudes,":[135],"leading":[136],"acceleration":[139,166],"training,":[145],"while":[146],"resulting":[147],"more":[149],"robust":[150,168],"models.":[151],"Based":[152],"on":[153,170],"term,":[157],"formulate":[159],"two":[160],"novel":[161],"alternatives":[163],"demonstrating":[164],"capabilities":[169],"image":[171],"classification":[172],"benchmarks.":[173]},"counts_by_year":[],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-10-24T00:00:00"}
