{"id":"https://openalex.org/W4296707792","doi":"https://doi.org/10.1109/tai.2022.3208223","title":"AdaInject: Injection-Based Adaptive Gradient Descent Optimizers for Convolutional Neural Networks","display_name":"AdaInject: Injection-Based Adaptive Gradient Descent Optimizers for Convolutional Neural Networks","publication_year":2022,"publication_date":"2022-09-21","ids":{"openalex":"https://openalex.org/W4296707792","doi":"https://doi.org/10.1109/tai.2022.3208223"},"language":"en","primary_location":{"id":"doi:10.1109/tai.2022.3208223","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tai.2022.3208223","pdf_url":null,"source":{"id":"https://openalex.org/S4210169448","display_name":"IEEE Transactions on Artificial Intelligence","issn_l":"2691-4581","issn":["2691-4581"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5048082929","display_name":"Shiv Ram Dubey","orcid":"https://orcid.org/0000-0002-4532-8996"},"institutions":[{"id":"https://openalex.org/I26072440","display_name":"Indian Institute of Information Technology Allahabad","ror":"https://ror.org/03rgjt374","country_code":"IN","type":"education","lineage":["https://openalex.org/I26072440"]}],"countries":["IN"],"is_corresponding":true,"raw_author_name":"Shiv Ram Dubey","raw_affiliation_strings":["Computer Vision and Biometrics Laboratory (CVBL), Indian Institute of Information Technology, Allahabad, Prayagraj, India"],"raw_orcid":"https://orcid.org/0000-0002-4532-8996","affiliations":[{"raw_affiliation_string":"Computer Vision and Biometrics Laboratory (CVBL), Indian Institute of Information Technology, Allahabad, Prayagraj, India","institution_ids":["https://openalex.org/I26072440"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5090340316","display_name":"S. H. Shabbeer Basha","orcid":"https://orcid.org/0000-0002-8590-0897"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"S. H. Shabbeer Basha","raw_affiliation_strings":["PathPartner Technology Pvt. Ltd., Bangalore, India"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"PathPartner Technology Pvt. Ltd., Bangalore, India","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071107603","display_name":"Satish Kumar Singh","orcid":"https://orcid.org/0000-0002-8536-4991"},"institutions":[{"id":"https://openalex.org/I26072440","display_name":"Indian Institute of Information Technology Allahabad","ror":"https://ror.org/03rgjt374","country_code":"IN","type":"education","lineage":["https://openalex.org/I26072440"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Satish Kumar Singh","raw_affiliation_strings":["Computer Vision and Biometrics Laboratory (CVBL), Indian Institute of Information Technology, Allahabad, Prayagraj, India"],"raw_orcid":"https://orcid.org/0000-0002-8536-4991","affiliations":[{"raw_affiliation_string":"Computer Vision and Biometrics Laboratory (CVBL), Indian Institute of Information Technology, Allahabad, Prayagraj, India","institution_ids":["https://openalex.org/I26072440"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5025093894","display_name":"B.B. Chaudhuri","orcid":"https://orcid.org/0000-0003-0297-8929"},"institutions":[{"id":"https://openalex.org/I1288043984","display_name":"Techno India University","ror":"https://ror.org/00v1y6t69","country_code":"IN","type":"education","lineage":["https://openalex.org/I1288043984"]},{"id":"https://openalex.org/I6498739","display_name":"Indian Statistical Institute","ror":"https://ror.org/00q2w1j53","country_code":"IN","type":"education","lineage":["https://openalex.org/I6498739"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Bidyut Baran Chaudhuri","raw_affiliation_strings":["Indian Statistical Institute, Kolkata, India","Techno India University, Kolkata, India"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Indian Statistical Institute, Kolkata, India","institution_ids":["https://openalex.org/I6498739"]},{"raw_affiliation_string":"Techno India University, Kolkata, India","institution_ids":["https://openalex.org/I1288043984"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5048082929"],"corresponding_institution_ids":["https://openalex.org/I26072440"],"apc_list":null,"apc_paid":null,"fwci":1.8367,"has_fulltext":false,"cited_by_count":20,"citation_normalized_percentile":{"value":0.86716723,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":"4","issue":"6","first_page":"1540","last_page":"1548"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12676","display_name":"Machine Learning and ELM","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10331","display_name":"Video Surveillance and Tracking Methods","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.7267406582832336},{"id":"https://openalex.org/keywords/moment","display_name":"Moment (physics)","score":0.6884790658950806},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6865473985671997},{"id":"https://openalex.org/keywords/convergence","display_name":"Convergence (economics)","score":0.6233305335044861},{"id":"https://openalex.org/keywords/gradient-descent","display_name":"Gradient descent","score":0.6198805570602417},{"id":"https://openalex.org/keywords/stochastic-gradient-descent","display_name":"Stochastic gradient descent","score":0.6045350432395935},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.5918904542922974},{"id":"https://openalex.org/keywords/rate-of-convergence","display_name":"Rate of convergence","score":0.453194260597229},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.4524674117565155},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.406879723072052},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3840421140193939},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.3689635396003723},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.2304697334766388},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.21679118275642395}],"concepts":[{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.7267406582832336},{"id":"https://openalex.org/C179254644","wikidata":"https://www.wikidata.org/wiki/Q13222844","display_name":"Moment (physics)","level":2,"score":0.6884790658950806},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6865473985671997},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.6233305335044861},{"id":"https://openalex.org/C153258448","wikidata":"https://www.wikidata.org/wiki/Q1199743","display_name":"Gradient descent","level":3,"score":0.6198805570602417},{"id":"https://openalex.org/C206688291","wikidata":"https://www.wikidata.org/wiki/Q7617819","display_name":"Stochastic gradient descent","level":3,"score":0.6045350432395935},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.5918904542922974},{"id":"https://openalex.org/C57869625","wikidata":"https://www.wikidata.org/wiki/Q1783502","display_name":"Rate of convergence","level":3,"score":0.453194260597229},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.4524674117565155},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.406879723072052},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3840421140193939},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.3689635396003723},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.2304697334766388},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.21679118275642395},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C50522688","wikidata":"https://www.wikidata.org/wiki/Q189833","display_name":"Economic growth","level":1,"score":0.0},{"id":"https://openalex.org/C74650414","wikidata":"https://www.wikidata.org/wiki/Q11397","display_name":"Classical mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tai.2022.3208223","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tai.2022.3208223","pdf_url":null,"source":{"id":"https://openalex.org/S4210169448","display_name":"IEEE Transactions on Artificial Intelligence","issn_l":"2691-4581","issn":["2691-4581"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Artificial Intelligence","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":54,"referenced_works":["https://openalex.org/W6908809","https://openalex.org/W114517082","https://openalex.org/W1536680647","https://openalex.org/W1988720110","https://openalex.org/W1994616650","https://openalex.org/W2096733369","https://openalex.org/W2117539524","https://openalex.org/W2145287260","https://openalex.org/W2146502635","https://openalex.org/W2194775991","https://openalex.org/W2344138609","https://openalex.org/W2549139847","https://openalex.org/W2551176409","https://openalex.org/W2613332842","https://openalex.org/W2750384547","https://openalex.org/W2752782242","https://openalex.org/W2798391154","https://openalex.org/W2803681939","https://openalex.org/W2894176037","https://openalex.org/W2955747520","https://openalex.org/W2962793481","https://openalex.org/W2963446712","https://openalex.org/W2982064756","https://openalex.org/W2989673842","https://openalex.org/W3031420959","https://openalex.org/W3039655629","https://openalex.org/W3084521418","https://openalex.org/W3110386488","https://openalex.org/W3118608800","https://openalex.org/W3138049982","https://openalex.org/W3164284422","https://openalex.org/W3186548651","https://openalex.org/W3199102655","https://openalex.org/W3205812661","https://openalex.org/W4210800005","https://openalex.org/W6600284362","https://openalex.org/W6604254268","https://openalex.org/W6620707391","https://openalex.org/W6631190155","https://openalex.org/W6637373629","https://openalex.org/W6681435938","https://openalex.org/W6743688258","https://openalex.org/W6749646390","https://openalex.org/W6754654111","https://openalex.org/W6757107679","https://openalex.org/W6757751764","https://openalex.org/W6760385162","https://openalex.org/W6762287338","https://openalex.org/W6767164110","https://openalex.org/W6770035337","https://openalex.org/W6784291726","https://openalex.org/W6784591795","https://openalex.org/W6787972765","https://openalex.org/W6795676476"],"related_works":["https://openalex.org/W2895097035","https://openalex.org/W4206903459","https://openalex.org/W2754816816","https://openalex.org/W4366280654","https://openalex.org/W3160167280","https://openalex.org/W4362706668","https://openalex.org/W4231621013","https://openalex.org/W3171021120","https://openalex.org/W3008318776","https://openalex.org/W2041416246"],"abstract_inverted_index":{"The":[0,15,77,96,112],"convolutional":[1],"neural":[2],"networks":[3],"(CNNs)":[4],"are":[5,170],"generally":[6,19],"trained":[7],"using":[8,188],"stochastic":[9],"gradient":[10,45],"descent":[11,46],"(SGD)-based":[12],"optimization":[13],"techniques.":[14],"existing":[16,109,206],"SGD":[17,110],"optimizers":[18,47,207],"suffer":[20],"with":[21,107,153,191,208],"the":[22,25,44,50,54,58,70,74,81,85,88,92,115,131,135,142,145,154,173,179,195,209],"overshooting":[23,86],"of":[24,87,114,134,144,184,205],"minimum":[26],"and":[27,90,103,161],"oscillation":[28,93],"near":[29,94],"minimum.":[30,95],"In":[31],"this":[32],"article,":[33],"we":[34,140,199],"propose":[35],"a":[36,66],"new":[37],"approach,":[38],"hereafter":[39],"referred":[40],"as":[41,65,121,123],"AdaInject,":[42],"for":[43],"by":[48],"injecting":[49],"second-order":[51,71],"moment":[52,72],"into":[53],"first-order":[55],"moment.":[56],"Specifically,":[57],"short-term":[59],"change":[60],"in":[61,73,101,151,172,178],"parameter":[62,82],"is":[63,99,118,186],"used":[64,171],"weight":[67],"to":[68],"inject":[69],"update":[75],"rule.":[76],"AdaInject":[78,116,146,211],"optimizer":[79,117,190],"controls":[80],"update,":[83],"avoids":[84],"minimum,":[89],"reduces":[91],"proposed":[97,136,210],"approach":[98,147],"generic":[100],"nature":[102],"can":[104],"be":[105],"integrated":[106],"any":[108],"optimizer.":[111,138],"effectiveness":[113],"explained":[119],"intuitively":[120],"well":[122],"through":[124,148],"some":[125],"toy":[126],"examples.":[127],"We":[128],"also":[129],"show":[130],"convergence":[132],"property":[133],"injection-based":[137],"Furthermore,":[139],"depict":[141],"efficacy":[143],"extensive":[149],"experiments":[150],"conjunction":[152],"state-of-the-art":[155],"optimizers,":[156],"namely":[157],"AdamInject,":[158],"diffGradInject,":[159],"RadamInject,":[160],"AdaBeliefInject,":[162],"on":[163],"four":[164],"benchmark":[165],"datasets.":[166],"Different":[167],"CNN":[168],"models":[169],"experiments.":[174],"A":[175],"highest":[176],"improvement":[177,204],"top-1":[180],"classification":[181],"error":[182],"rate":[183],"16.54%":[185],"observed":[187],"diffGradInject":[189],"ResNeXt29":[192],"model":[193],"over":[194],"CIFAR10":[196],"dataset.":[197],"Overall,":[198],"observe":[200],"very":[201],"promising":[202],"performance":[203],"approach.":[212]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":7},{"year":2024,"cited_by_count":4},{"year":2023,"cited_by_count":6},{"year":2022,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
