{"id":"https://openalex.org/W7161539466","doi":"https://doi.org/10.48550/arxiv.2605.15622","title":"Position: Zeroth-Order Optimization in Deep Learning Is Underexplored, Not Underpowered","display_name":"Position: Zeroth-Order Optimization in Deep Learning Is Underexplored, Not Underpowered","publication_year":2026,"publication_date":"2026-05-15","ids":{"openalex":"https://openalex.org/W7161539466","doi":"https://doi.org/10.48550/arxiv.2605.15622"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.15622","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.15622","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.15622","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5136405733","display_name":"Sijia Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Sijia","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111235146","display_name":"Yicheng Lang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lang, Yicheng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043915060","display_name":"Soumyadeep Pal","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Pal, Soumyadeep","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136440464","display_name":"Changsheng Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Changsheng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136358390","display_name":"Yancheng Huang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Huang, Yancheng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136342796","display_name":"Chongyu Fan","orcid":"https://orcid.org/0009-0008-4228-8942"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fan, Chongyu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136399292","display_name":"James Diffenderfer","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Diffenderfer, James","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5041470575","display_name":"Bhavya Kailkhura","orcid":"https://orcid.org/0000-0002-2819-2919"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kailkhura, Bhavya","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5136409614","display_name":"Yihua Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Yihua","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":9,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.2612999975681305,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.2612999975681305,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11273","display_name":"Advanced Graph Neural Networks","score":0.07999999821186066,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12814","display_name":"Gaussian Processes and Bayesian Inference","score":0.06530000269412994,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/variance","display_name":"Variance (accounting)","score":0.6243000030517578},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.6237000226974487},{"id":"https://openalex.org/keywords/estimator","display_name":"Estimator","score":0.5730000138282776},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.558899998664856},{"id":"https://openalex.org/keywords/path","display_name":"Path (computing)","score":0.4909999966621399},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.47440001368522644},{"id":"https://openalex.org/keywords/variance-reduction","display_name":"Variance reduction","score":0.42820000648498535}],"concepts":[{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6869999766349792},{"id":"https://openalex.org/C196083921","wikidata":"https://www.wikidata.org/wiki/Q7915758","display_name":"Variance (accounting)","level":2,"score":0.6243000030517578},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6237000226974487},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6202999949455261},{"id":"https://openalex.org/C185429906","wikidata":"https://www.wikidata.org/wiki/Q1130160","display_name":"Estimator","level":2,"score":0.5730000138282776},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.558899998664856},{"id":"https://openalex.org/C2777735758","wikidata":"https://www.wikidata.org/wiki/Q817765","display_name":"Path (computing)","level":2,"score":0.4909999966621399},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.47440001368522644},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.46869999170303345},{"id":"https://openalex.org/C62644790","wikidata":"https://www.wikidata.org/wiki/Q3454689","display_name":"Variance reduction","level":3,"score":0.42820000648498535},{"id":"https://openalex.org/C32834561","wikidata":"https://www.wikidata.org/wiki/Q660730","display_name":"Subspace topology","level":2,"score":0.4016000032424927},{"id":"https://openalex.org/C111335779","wikidata":"https://www.wikidata.org/wiki/Q3454686","display_name":"Reduction (mathematics)","level":2,"score":0.3968000113964081},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.2766000032424927},{"id":"https://openalex.org/C2778334786","wikidata":"https://www.wikidata.org/wiki/Q1586270","display_name":"Variation (astronomy)","level":2,"score":0.26489999890327454},{"id":"https://openalex.org/C137836250","wikidata":"https://www.wikidata.org/wiki/Q984063","display_name":"Optimization problem","level":2,"score":0.2648000121116638},{"id":"https://openalex.org/C2982736386","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Statistical learning","level":2,"score":0.2619999945163727}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.15622","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.15622","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.15622","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.15622","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"score":0.407817006111145,"display_name":"Decent work and economic growth","id":"https://metadata.un.org/sdg/8"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Zeroth-order":[0],"(ZO)":[1],"optimization,":[2],"learning":[3,18,179],"from":[4,69,154],"finite":[5],"differences":[6],"of":[7,41,96,119,134],"function":[8],"evaluations":[9,153],"without":[10],"backpropagation,":[11],"has":[12],"recently":[13],"regained":[14],"attention":[15],"in":[16],"deep":[17],"due":[19],"to":[20,26,150],"its":[21,164],"memory":[22],"efficiency":[23],"and":[24,44,87,105,116,143,146,167,177],"applicability":[25],"gray-":[27],"or":[28],"black-box":[29],"pipelines.":[30],"Yet,":[31],"ZO":[32,56,98,120,135,152,161,181],"methods":[33,99],"are":[34],"often":[35],"dismissed":[36],"as":[37,136],"fundamentally":[38],"unscalable":[39],"because":[40],"estimator":[42],"variance":[43,101,124],"unfavorable":[45],"query":[46,128],"complexity.":[47,156],"We":[48,62,79,157],"argue":[49],"that":[50,64,121],"this":[51],"conclusion":[52],"might":[53],"be":[54],"misguided:":[55],"optimization":[57,162],"is":[58],"underexplored,":[59],"not":[60],"underpowered.":[61],"show":[63],"many":[65],"perceived":[66],"limitations":[67],"stem":[68],"myopic":[70],"development":[71],"practices,":[72],"most":[73],"notably":[74],"full-space,":[75],"element-wise,":[76],"estimator-centric":[77,97],"designs.":[78],"articulate":[80],"six":[81],"positions":[82],"spanning":[83],"the":[84,93,131,148],"algorithmic,":[85],"systems,":[86],"evaluation":[88],"stack.":[89],"First,":[90],"we":[91,109],"revisit":[92],"feasibility":[94],"boundaries":[95],"through":[100],"control,":[102],"variance-query":[103],"tradeoffs,":[104],"directional-derivative":[106],"lenses.":[107],"Then,":[108],"identify":[110],"three":[111],"underexplored":[112],"opportunities:":[113],"(i)":[114],"subspace":[115],"spectral":[117],"views":[118],"enable":[122],"interpretable":[123],"reduction":[125],"with":[126,180],"graceful":[127],"scaling,":[129],"(ii)":[130],"forward-only":[132],"nature":[133],"a":[137,171],"systems":[138],"advantage":[139],"for":[140],"communication-efficient,":[141],"pipeline-friendly,":[142],"resource-constrained":[144],"training,":[145],"(iii)":[147],"need":[149],"de-obfuscate":[151],"task":[155],"strongly":[158],"advocate":[159],"rethinking":[160],"around":[163],"unique":[165],"strengths":[166],"acting":[168],"accordingly,":[169],"opening":[170],"viable":[172],"path":[173],"toward":[174],"large-scale,":[175],"system-aware,":[176],"resource-efficient":[178],"optimization.":[182]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-19T00:00:00"}
