{"id":"https://openalex.org/W4406458196","doi":"https://doi.org/10.1109/bigdata62323.2024.10825379","title":"Fast Second-order Method for Neural Networks under Small Treewidth Setting","display_name":"Fast Second-order Method for Neural Networks under Small Treewidth Setting","publication_year":2024,"publication_date":"2024-12-15","ids":{"openalex":"https://openalex.org/W4406458196","doi":"https://doi.org/10.1109/bigdata62323.2024.10825379"},"language":"en","primary_location":{"id":"doi:10.1109/bigdata62323.2024.10825379","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata62323.2024.10825379","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Big Data (BigData)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5083084103","display_name":"Xiaoyu Li","orcid":"https://orcid.org/0009-0007-3006-3060"},"institutions":[{"id":"https://openalex.org/I108468826","display_name":"Stevens Institute of Technology","ror":"https://ror.org/02z43xh36","country_code":"US","type":"education","lineage":["https://openalex.org/I108468826"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Xiaoyu Li","raw_affiliation_strings":["Stevens Institute of Technology,Department of Computer Science,Hoboken,NJ"],"affiliations":[{"raw_affiliation_string":"Stevens Institute of Technology,Department of Computer Science,Hoboken,NJ","institution_ids":["https://openalex.org/I108468826"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5030617408","display_name":"Jun Long","orcid":"https://orcid.org/0009-0008-6097-0646"},"institutions":[{"id":"https://openalex.org/I90610280","display_name":"South China University of Technology","ror":"https://ror.org/0530pts50","country_code":"CN","type":"education","lineage":["https://openalex.org/I90610280"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiangxuan Long","raw_affiliation_strings":["South China University of Technology,School of Software Engineering,Guangzhou,China"],"affiliations":[{"raw_affiliation_string":"South China University of Technology,School of Software Engineering,Guangzhou,China","institution_ids":["https://openalex.org/I90610280"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103280080","display_name":"Zhao Song","orcid":"https://orcid.org/0000-0003-4589-5234"},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]},{"id":"https://openalex.org/I4210107338","display_name":"Simons Foundation","ror":"https://ror.org/01cmst727","country_code":"US","type":"nonprofit","lineage":["https://openalex.org/I4210107338"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Zhao Song","raw_affiliation_strings":["University of California, Berkeley,The Simons Institute for the Theory of Computing,Berkeley,CA"],"affiliations":[{"raw_affiliation_string":"University of California, Berkeley,The Simons Institute for the Theory of Computing,Berkeley,CA","institution_ids":["https://openalex.org/I4210107338","https://openalex.org/I95457486"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5039076312","display_name":"Tianyi Zhou","orcid":"https://orcid.org/0000-0001-5348-0632"},"institutions":[{"id":"https://openalex.org/I1174212","display_name":"University of Southern California","ror":"https://ror.org/03taz7m60","country_code":"US","type":"education","lineage":["https://openalex.org/I1174212"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Tianyi Zhou","raw_affiliation_strings":["University of Southern California,Department of Computer Science,Los Angeles,CA"],"affiliations":[{"raw_affiliation_string":"University of Southern California,Department of Computer Science,Los Angeles,CA","institution_ids":["https://openalex.org/I1174212"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5083084103"],"corresponding_institution_ids":["https://openalex.org/I108468826"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.23047019,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1029","last_page":"1038"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10320","display_name":"Neural Networks and Applications","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10320","display_name":"Neural Networks and Applications","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12676","display_name":"Machine Learning and ELM","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/treewidth","display_name":"Treewidth","score":0.6865098476409912},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6178711652755737},{"id":"https://openalex.org/keywords/order","display_name":"Order (exchange)","score":0.5469085574150085},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.49903416633605957},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.33244097232818604},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.2906641960144043},{"id":"https://openalex.org/keywords/pathwidth","display_name":"Pathwidth","score":0.11889967322349548},{"id":"https://openalex.org/keywords/graph","display_name":"Graph","score":0.10890114307403564}],"concepts":[{"id":"https://openalex.org/C132569581","wikidata":"https://www.wikidata.org/wiki/Q5067368","display_name":"Treewidth","level":5,"score":0.6865098476409912},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6178711652755737},{"id":"https://openalex.org/C182306322","wikidata":"https://www.wikidata.org/wiki/Q1779371","display_name":"Order (exchange)","level":2,"score":0.5469085574150085},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.49903416633605957},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.33244097232818604},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.2906641960144043},{"id":"https://openalex.org/C43517604","wikidata":"https://www.wikidata.org/wiki/Q7144893","display_name":"Pathwidth","level":4,"score":0.11889967322349548},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.10890114307403564},{"id":"https://openalex.org/C203776342","wikidata":"https://www.wikidata.org/wiki/Q1378376","display_name":"Line graph","level":3,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C10138342","wikidata":"https://www.wikidata.org/wiki/Q43015","display_name":"Finance","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/bigdata62323.2024.10825379","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata62323.2024.10825379","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Big Data (BigData)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":111,"referenced_works":["https://openalex.org/W1520511539","https://openalex.org/W1522301498","https://openalex.org/W1583837637","https://openalex.org/W1970789124","https://openalex.org/W2027562075","https://openalex.org/W2146502635","https://openalex.org/W2566079294","https://openalex.org/W2809090039","https://openalex.org/W2885880010","https://openalex.org/W2898324627","https://openalex.org/W2912322140","https://openalex.org/W2917744435","https://openalex.org/W2945554113","https://openalex.org/W2946985165","https://openalex.org/W2947461788","https://openalex.org/W2948046738","https://openalex.org/W2949804919","https://openalex.org/W2963060476","https://openalex.org/W3021189130","https://openalex.org/W3036165826","https://openalex.org/W3098047114","https://openalex.org/W3101014297","https://openalex.org/W3112410551","https://openalex.org/W3213641637","https://openalex.org/W4206742934","https://openalex.org/W4244364296","https://openalex.org/W4289293816","https://openalex.org/W4289436733","https://openalex.org/W4298403519","https://openalex.org/W4309044967","https://openalex.org/W4378765283","https://openalex.org/W4386794659","https://openalex.org/W4386807907","https://openalex.org/W4387075214","https://openalex.org/W4389115958","https://openalex.org/W4394591690","https://openalex.org/W4396787095","https://openalex.org/W4399115608","https://openalex.org/W4399115613","https://openalex.org/W4399317489","https://openalex.org/W4399401485","https://openalex.org/W4399447872","https://openalex.org/W4402426819","https://openalex.org/W4402699832","https://openalex.org/W4402701877","https://openalex.org/W4402955091","https://openalex.org/W4403564645","https://openalex.org/W4403564681","https://openalex.org/W4403573649","https://openalex.org/W4403573651","https://openalex.org/W4403573665","https://openalex.org/W4403580695","https://openalex.org/W4403582456","https://openalex.org/W4405622284","https://openalex.org/W4406059550","https://openalex.org/W4406072145","https://openalex.org/W6608133726","https://openalex.org/W6631190155","https://openalex.org/W6638214083","https://openalex.org/W6674634876","https://openalex.org/W6681435938","https://openalex.org/W6683107984","https://openalex.org/W6693969136","https://openalex.org/W6743885270","https://openalex.org/W6752495264","https://openalex.org/W6753918066","https://openalex.org/W6755150206","https://openalex.org/W6755938841","https://openalex.org/W6756091659","https://openalex.org/W6756455746","https://openalex.org/W6759955879","https://openalex.org/W6762441359","https://openalex.org/W6763047453","https://openalex.org/W6763152702","https://openalex.org/W6763304541","https://openalex.org/W6782948483","https://openalex.org/W6796340207","https://openalex.org/W6796610148","https://openalex.org/W6802419229","https://openalex.org/W6841372840","https://openalex.org/W6846303937","https://openalex.org/W6846342317","https://openalex.org/W6846737073","https://openalex.org/W6849168699","https://openalex.org/W6850406903","https://openalex.org/W6852164419","https://openalex.org/W6852965349","https://openalex.org/W6856291605","https://openalex.org/W6856432607","https://openalex.org/W6856845988","https://openalex.org/W6859157733","https://openalex.org/W6862735123","https://openalex.org/W6864053229","https://openalex.org/W6864125768","https://openalex.org/W6868631447","https://openalex.org/W6868832719","https://openalex.org/W6869019578","https://openalex.org/W6870143325","https://openalex.org/W6871571102","https://openalex.org/W6872490206","https://openalex.org/W6872829819","https://openalex.org/W6872943007","https://openalex.org/W6873002424","https://openalex.org/W6873160360","https://openalex.org/W6873336637","https://openalex.org/W6873681233","https://openalex.org/W6873776458","https://openalex.org/W6874834294","https://openalex.org/W6875416043","https://openalex.org/W6876316728","https://openalex.org/W6948022635"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W4299559467","https://openalex.org/W2949617446","https://openalex.org/W4294299870","https://openalex.org/W2748664888","https://openalex.org/W1582492864","https://openalex.org/W4295887958","https://openalex.org/W2745231626"],"abstract_inverted_index":{"Training":[0],"neural":[1,43,87,173],"networks":[2],"is":[3,70,82,127,163],"a":[4,141,151,168,177],"fundamental":[5],"problem":[6],"in":[7,16,47],"theoretical":[8],"machine":[9],"learning.":[10],"Second-order":[11],"methods":[12],"are":[13],"rarely":[14],"used":[15],"practice":[17],"due":[18],"to":[19,39,112,166],"their":[20],"high":[21],"computational":[22],"cost,":[23],"even":[24],"they":[25],"converge":[26],"much":[27],"faster":[28],"than":[29],"first-order":[30],"methods.":[31],"The":[32],"state-of-the-art":[33],"result":[34],"for":[35,109,172],"the":[36,64,71,74,83,86,102,106,128,131,156,164],"second-order":[37,153],"method":[38],"train":[40],"an":[41],"over-parameterized":[42],"network":[44,88,174],"can":[45,147],"run":[46],"O(log(1/\u03f5))":[48],"iterations":[49],"and":[50,80,92,105,114,125,134],"each":[51,78,110],"iteration":[52,111],"has":[53,140],"O(mnd":[54],"+":[55],"n<sup":[56],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[57,116,123,183],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">3</sup>)":[58],"running":[59,107,179],"time,":[60],"where":[61,119],"n":[62],"represent":[63],"number":[65],"of":[66,73,77,85,130,158,181],"data":[67,79,132],"points,":[68],"d":[69],"dimension":[72],"feature":[75],"space":[76],"m":[81,120],"width":[84],"[Brand,":[89],"Peng,":[90],"Song":[91],"Weinstein":[93],"ITCS":[94],"2021].":[95],"In":[96],"this":[97],"work,":[98],"we":[99],"further":[100],"improve":[101],"convergence":[103,143,170],"rate":[104,171],"time":[108,180],"O(loglog(1/\u03f5))":[113],"O(md\u03c4<sup":[115,182],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">2</sup>)":[117],"respectively,":[118],"=":[121],"\u03a9(n<sup":[122],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">4</sup>)":[124],"\u03c4":[126],"treewidth":[129],"matrix":[133],"usually":[135],"very":[136],"small.":[137],"Our":[138],"algorithm":[139,162],"quadratic":[142,169],"rate,":[144],"so":[145],"it":[146],"be":[148],"regarded":[149],"as":[150],"truly":[152],"algorithm.":[154],"To":[155],"best":[157],"our":[159,161],"knowledge,":[160],"first":[165],"achieve":[167],"training,":[175],"with":[176],"per-iteration":[178],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">2</sup>).":[184]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
