{"id":"https://openalex.org/W4399418769","doi":"https://doi.org/10.1145/3651671.3651740","title":"Better Generalization in Fast Training: Flat Trainable Weight in Subspace","display_name":"Better Generalization in Fast Training: Flat Trainable Weight in Subspace","publication_year":2024,"publication_date":"2024-02-02","ids":{"openalex":"https://openalex.org/W4399418769","doi":"https://doi.org/10.1145/3651671.3651740"},"language":"en","primary_location":{"id":"doi:10.1145/3651671.3651740","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3651671.3651740","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2024 16th International Conference on Machine Learning and Computing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5054795414","display_name":"Z Xiao Lei","orcid":"https://orcid.org/0009-0000-2658-3310"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Zehao Lei","raw_affiliation_strings":["Department of Automation, Shanghai Jiao Tong University, China"],"raw_orcid":"https://orcid.org/0009-0000-2658-3310","affiliations":[{"raw_affiliation_string":"Department of Automation, Shanghai Jiao Tong University, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043103794","display_name":"Yingwen Wu","orcid":"https://orcid.org/0000-0002-9994-2414"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yingwen Wu","raw_affiliation_strings":["Department of Automation, Shanghai Jiao Tong University, China"],"raw_orcid":"https://orcid.org/0000-0002-9994-2414","affiliations":[{"raw_affiliation_string":"Department of Automation, Shanghai Jiao Tong University, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5082175942","display_name":"Tao Li","orcid":"https://orcid.org/0000-0002-8010-1447"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Tao Li","raw_affiliation_strings":["Department of Automation, Shanghai Jiao Tong University, China"],"raw_orcid":"https://orcid.org/0000-0002-8010-1447","affiliations":[{"raw_affiliation_string":"Department of Automation, Shanghai Jiao Tong University, China","institution_ids":["https://openalex.org/I183067930"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5054795414"],"corresponding_institution_ids":["https://openalex.org/I183067930"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.06972656,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"470","last_page":"477"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.7531129121780396},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7071353793144226},{"id":"https://openalex.org/keywords/subspace-topology","display_name":"Subspace topology","score":0.643146276473999},{"id":"https://openalex.org/keywords/convergence","display_name":"Convergence (economics)","score":0.5684852600097656},{"id":"https://openalex.org/keywords/schedule","display_name":"Schedule","score":0.5119041204452515},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.5005452632904053},{"id":"https://openalex.org/keywords/training","display_name":"Training (meteorology)","score":0.47794297337532043},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.4603666067123413},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.4552130401134491},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4519890248775482},{"id":"https://openalex.org/keywords/trajectory","display_name":"Trajectory","score":0.4122006297111511},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.4119776487350464},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.23169028759002686}],"concepts":[{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.7531129121780396},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7071353793144226},{"id":"https://openalex.org/C32834561","wikidata":"https://www.wikidata.org/wiki/Q660730","display_name":"Subspace topology","level":2,"score":0.643146276473999},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.5684852600097656},{"id":"https://openalex.org/C68387754","wikidata":"https://www.wikidata.org/wiki/Q7271585","display_name":"Schedule","level":2,"score":0.5119041204452515},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.5005452632904053},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.47794297337532043},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.4603666067123413},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.4552130401134491},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4519890248775482},{"id":"https://openalex.org/C13662910","wikidata":"https://www.wikidata.org/wiki/Q193139","display_name":"Trajectory","level":2,"score":0.4122006297111511},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.4119776487350464},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.23169028759002686},{"id":"https://openalex.org/C1276947","wikidata":"https://www.wikidata.org/wiki/Q333","display_name":"Astronomy","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C50522688","wikidata":"https://www.wikidata.org/wiki/Q189833","display_name":"Economic growth","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C153294291","wikidata":"https://www.wikidata.org/wiki/Q25261","display_name":"Meteorology","level":1,"score":0.0},{"id":"https://openalex.org/C78458016","wikidata":"https://www.wikidata.org/wiki/Q840400","display_name":"Evolutionary biology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3651671.3651740","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3651671.3651740","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2024 16th International Conference on Machine Learning and Computing","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":8,"referenced_works":["https://openalex.org/W2108598243","https://openalex.org/W2183341477","https://openalex.org/W2194775991","https://openalex.org/W2552194003","https://openalex.org/W3194779779","https://openalex.org/W3212348395","https://openalex.org/W4312709113","https://openalex.org/W6796913975"],"related_works":["https://openalex.org/W1980381208","https://openalex.org/W2364594919","https://openalex.org/W2167092671","https://openalex.org/W1861706286","https://openalex.org/W2219338811","https://openalex.org/W2149583853","https://openalex.org/W3162204513","https://openalex.org/W2143002539","https://openalex.org/W4293472652","https://openalex.org/W2130386332"],"abstract_inverted_index":{"Compressing":[0],"training":[1,36,43,48,79,93],"time":[2,164],"of":[3,17,66],"deep":[4],"neural":[5],"networks":[6],"(DNNs)":[7],"is":[8,51],"a":[9,63,109],"critical":[10],"task":[11],"due":[12],"to":[13,33,40,73],"the":[14,35,42,57,119,159],"huge":[15],"scale":[16],"data":[18],"and":[19,152,161],"models.":[20],"Different":[21],"from":[22],"most":[23],"previous":[24],"works":[25],"that":[26,54,117,138],"focus":[27],"on":[28,91,170],"using":[29,96,122],"large":[30],"batch":[31],"size":[32],"reduce":[34],"time,":[37],"we":[38,71,107],"consider":[39],"compress":[41],"epoch":[44],"through":[45],"our":[46],"designed":[47],"algorithm.":[49],"It":[50],"well":[52],"known":[53],"simply":[55],"reducing":[56],"learning":[58],"rate":[59],"schedule":[60],"results":[61],"in":[62,83,102,127,147],"significant":[64,145],"loss":[65,125],"generalization.":[67],"In":[68,154],"this":[69,139],"paper,":[70],"propose":[72],"maintain":[74],"test":[75],"accuracy":[76],"while":[77],"compressing":[78],"epochs":[80],"by":[81,87],"optimizing":[82],"extended":[84,128],"subspace":[85],"generated":[86],"historical":[88,97],"model":[89,148],"parameters":[90],"SGD":[92,169],"trajectory.":[94],"Although":[95],"information":[98],"has":[99],"been":[100],"studied":[101],"Trainable":[103,114,141],"Weight":[104],"Averaging":[105],"(TWA),":[106],"design":[108],"new":[110],"algorithm":[111],"called":[112],"Flat":[113,140],"Weights":[115,142],"(FTW)":[116,143],"optimizes":[118],"weight":[120],"coefficients":[121],"explicit":[123],"sharpness":[124],"function":[126],"low-dimensional":[129],"subspace,":[130],"which":[131],"achieves":[132,144],"better":[133],"generalization":[134,149],"performance.":[135],"We":[136],"show":[137],"improvement":[146],"over":[150,165,168],"TWA":[151],"SGD.":[153],"fast":[155],"training,":[156],"FTW":[157],"accelerates":[158],"convergence":[160],"saves":[162],"15%":[163],"TWA,":[166],"35%":[167],"CIFAR":[171],"datasets.":[172]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
