{"id":"https://openalex.org/W4414360573","doi":"https://doi.org/10.24963/ijcai.2025/348","title":"Sharpness-aware Zeroth-order Optimization for Graph Transformers","display_name":"Sharpness-aware Zeroth-order Optimization for Graph Transformers","publication_year":2025,"publication_date":"2025-09-01","ids":{"openalex":"https://openalex.org/W4414360573","doi":"https://doi.org/10.24963/ijcai.2025/348"},"language":"en","primary_location":{"id":"doi:10.24963/ijcai.2025/348","is_oa":false,"landing_page_url":"https://doi.org/10.24963/ijcai.2025/348","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Thirty-Fourth International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5052906249","display_name":"Yang Liu","orcid":"https://orcid.org/0000-0003-3791-4343"},"institutions":[{"id":"https://openalex.org/I4210120485","display_name":"Academy of Mathematics and Systems Science","ror":"https://ror.org/02jkmyk67","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210120485"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yang Liu","raw_affiliation_strings":["Academy of Mathematics and Systems Science"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Academy of Mathematics and Systems Science","institution_ids":["https://openalex.org/I4210120485"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5085319576","display_name":"Chuan Zhou","orcid":"https://orcid.org/0000-0001-9958-8673"},"institutions":[{"id":"https://openalex.org/I4210120485","display_name":"Academy of Mathematics and Systems Science","ror":"https://ror.org/02jkmyk67","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210120485"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chuan Zhou","raw_affiliation_strings":["Academy of Mathematics and Systems Science","University of Chinese Academy of Science"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Academy of Mathematics and Systems Science","institution_ids":["https://openalex.org/I4210120485"]},{"raw_affiliation_string":"University of Chinese Academy of Science","institution_ids":["https://openalex.org/I4210165038"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5082347745","display_name":"Yuhan Lin","orcid":"https://orcid.org/0000-0001-6309-1796"},"institutions":[{"id":"https://openalex.org/I24943067","display_name":"Fudan University","ror":"https://ror.org/013q1eq08","country_code":"CN","type":"education","lineage":["https://openalex.org/I24943067"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuhan Lin","raw_affiliation_strings":["Fudan University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Fudan University","institution_ids":["https://openalex.org/I24943067"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100328947","display_name":"Shuai Zhang","orcid":"https://orcid.org/0000-0003-4291-8170"},"institutions":[{"id":"https://openalex.org/I4210120485","display_name":"Academy of Mathematics and Systems Science","ror":"https://ror.org/02jkmyk67","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210120485"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shuai Zhang","raw_affiliation_strings":["Academy of Mathematics and Systems Science"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Academy of Mathematics and Systems Science","institution_ids":["https://openalex.org/I4210120485"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5051092826","display_name":"Yang Gao","orcid":"https://orcid.org/0000-0001-8915-5065"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yang Gao","raw_affiliation_strings":["Zhejiang University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Zhejiang University","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100716350","display_name":"Zhao Li","orcid":"https://orcid.org/0000-0002-7461-9944"},"institutions":[{"id":"https://openalex.org/I4210112565","display_name":"Cube Technology (United States)","ror":"https://ror.org/01t4yq738","country_code":"US","type":"company","lineage":["https://openalex.org/I4210112565"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Zhao Li","raw_affiliation_strings":["Hangzhou Yugu Technology"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Hangzhou Yugu Technology","institution_ids":["https://openalex.org/I4210112565"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5008056593","display_name":"Shirui Pan","orcid":"https://orcid.org/0000-0003-0794-527X"},"institutions":[{"id":"https://openalex.org/I11701301","display_name":"Griffith University","ror":"https://ror.org/02sc3r913","country_code":"AU","type":"education","lineage":["https://openalex.org/I11701301"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Shirui Pan","raw_affiliation_strings":["Griffith University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Griffith University","institution_ids":["https://openalex.org/I11701301"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5052906249"],"corresponding_institution_ids":["https://openalex.org/I4210120485"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.16625473,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"3126","last_page":"3134"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.9524999856948853,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.9524999856948853,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13182","display_name":"Quantum-Dot Cellular Automata","score":0.9262999892234802,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10682","display_name":"Quantum Computing Algorithms and Architecture","score":0.9233999848365784,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/minification","display_name":"Minification","score":0.6223999857902527},{"id":"https://openalex.org/keywords/convergence","display_name":"Convergence (economics)","score":0.5382000207901001},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.5184999704360962},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.5015000104904175},{"id":"https://openalex.org/keywords/graph","display_name":"Graph","score":0.392300009727478},{"id":"https://openalex.org/keywords/range","display_name":"Range (aeronautics)","score":0.36090001463890076}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6690999865531921},{"id":"https://openalex.org/C147764199","wikidata":"https://www.wikidata.org/wiki/Q6865248","display_name":"Minification","level":2,"score":0.6223999857902527},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.5382000207901001},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.5184999704360962},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.5015000104904175},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.4456000030040741},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.392300009727478},{"id":"https://openalex.org/C204323151","wikidata":"https://www.wikidata.org/wiki/Q905424","display_name":"Range (aeronautics)","level":2,"score":0.36090001463890076},{"id":"https://openalex.org/C164752517","wikidata":"https://www.wikidata.org/wiki/Q5570875","display_name":"Global optimization","level":2,"score":0.35339999198913574},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.3531999886035919},{"id":"https://openalex.org/C137836250","wikidata":"https://www.wikidata.org/wiki/Q984063","display_name":"Optimization problem","level":2,"score":0.34689998626708984},{"id":"https://openalex.org/C2987595161","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Optimization algorithm","level":2,"score":0.3452000021934509},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.33970001339912415},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.3102000057697296},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.25200000405311584},{"id":"https://openalex.org/C3017489831","wikidata":"https://www.wikidata.org/wiki/Q2393193","display_name":"Running time","level":2,"score":0.2508000135421753}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.24963/ijcai.2025/348","is_oa":false,"landing_page_url":"https://doi.org/10.24963/ijcai.2025/348","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Thirty-Fourth International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Graph":[0],"Transformers":[1],"(GTs)":[2],"have":[3],"emerged":[4],"as":[5,44],"powerful":[6],"tools":[7],"for":[8,112],"handling":[9],"graph-structured":[10],"data":[11],"through":[12],"global":[13],"attention":[14],"mechanisms.":[15],"While":[16],"GTs":[17,139],"can":[18],"effectively":[19],"capture":[20],"long-range":[21],"dependencies,":[22],"they":[23],"introduce":[24],"difficulties":[25],"in":[26],"optimization":[27],"due":[28,71],"to":[29,72],"their":[30],"complex,":[31],"non-differentiable":[32],"operators,":[33],"which":[34,147],"cannot":[35],"be":[36],"directly":[37],"handled":[38],"by":[39],"standard":[40],"gradient-based":[41],"optimizers":[42],"(such":[43],"Adam":[45],"or":[46],"AdamW).":[47],"To":[48],"investigate":[49],"the":[50,56,73,81,86,123,149,155],"above":[51,87],"issues,":[52],"this":[53],"work":[54],"adopts":[55],"line":[57],"of":[58,66,122,144,152],"Zeroth-Order":[59],"Optimization":[60],"(ZOO)":[61],"technique.":[62],"However,":[63],"direct":[64],"integration":[65],"ZOO":[67],"incurs":[68],"considerable":[69],"challenges":[70],"sharp":[74],"loss":[75],"landscape":[76],"and":[77,108,128],"steep":[78],"gradients":[79],"within":[80,104],"GT":[82],"parameter":[83],"space.":[84],"Under":[85],"observations,":[88],"we":[89,117,132],"propose":[90],"a":[91,105,119,141],"Sharpness-aware":[92],"Zeroth-order":[93],"Optimizer":[94],"(SZO)":[95],"that":[96],"combines":[97],"Sharpness-Aware":[98],"Minimization":[99],"(SAM)":[100],"technique":[101],"facilitating":[102],"convergence":[103,127],"flatter":[106],"neighborhood,":[107],"leverages":[109],"parallel":[110],"computing":[111],"efficient":[113],"gradient":[114],"estimation.":[115],"Theoretically,":[116],"provide":[118],"comprehensive":[120],"analysis":[121],"optimizer":[124],"from":[125],"both":[126],"generalization":[129],"perspectives.":[130],"Empirically,":[131],"conduct":[133],"extensive":[134],"experiments":[135],"on":[136],"various":[137],"classical":[138],"across":[140],"wide":[142],"range":[143],"benchmark":[145],"datasets,":[146],"underscore":[148],"superior":[150],"performance":[151],"SZO":[153],"over":[154],"state-of-the-art":[156],"optimizers.":[157]},"counts_by_year":[],"updated_date":"2026-05-21T06:26:12.895304","created_date":"2025-10-10T00:00:00"}
