{"id":"https://openalex.org/W4401404075","doi":"https://doi.org/10.1007/s40747-024-01595-w","title":"A noveliterationscheme withconjugategradient forfaster pruningon transformermodels","display_name":"A noveliterationscheme withconjugategradient forfaster pruningon transformermodels","publication_year":2024,"publication_date":"2024-08-07","ids":{"openalex":"https://openalex.org/W4401404075","doi":"https://doi.org/10.1007/s40747-024-01595-w"},"language":"en","primary_location":{"id":"doi:10.1007/s40747-024-01595-w","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s40747-024-01595-w","pdf_url":"https://link.springer.com/content/pdf/10.1007/s40747-024-01595-w.pdf","source":{"id":"https://openalex.org/S3035462843","display_name":"Complex & Intelligent Systems","issn_l":"2198-6053","issn":["2198-6053","2199-4536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Complex &amp; Intelligent Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://link.springer.com/content/pdf/10.1007/s40747-024-01595-w.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100361625","display_name":"Jun Li","orcid":"https://orcid.org/0000-0001-5845-8602"},"institutions":[{"id":"https://openalex.org/I41198531","display_name":"Nanjing University of Posts and Telecommunications","ror":"https://ror.org/043bpky34","country_code":"CN","type":"education","lineage":["https://openalex.org/I41198531"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Jun Li","raw_affiliation_strings":["College of Electronic and Optical Engineering and College of Flexible Electronics (Future Technology), Nanjing University of Posts and Telecommunications, Nanjing, 210023, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"College of Electronic and Optical Engineering and College of Flexible Electronics (Future Technology), Nanjing University of Posts and Telecommunications, Nanjing, 210023, China","institution_ids":["https://openalex.org/I41198531"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101608667","display_name":"Yuchen Zhu","orcid":"https://orcid.org/0000-0003-1114-2293"},"institutions":[{"id":"https://openalex.org/I41198531","display_name":"Nanjing University of Posts and Telecommunications","ror":"https://ror.org/043bpky34","country_code":"CN","type":"education","lineage":["https://openalex.org/I41198531"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuchen Zhu","raw_affiliation_strings":["College of Electronic and Optical Engineering and College of Flexible Electronics (Future Technology), Nanjing University of Posts and Telecommunications, Nanjing, 210023, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"College of Electronic and Optical Engineering and College of Flexible Electronics (Future Technology), Nanjing University of Posts and Telecommunications, Nanjing, 210023, China","institution_ids":["https://openalex.org/I41198531"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5017947016","display_name":"Kexue Sun","orcid":"https://orcid.org/0000-0002-7045-3182"},"institutions":[{"id":"https://openalex.org/I41198531","display_name":"Nanjing University of Posts and Telecommunications","ror":"https://ror.org/043bpky34","country_code":"CN","type":"education","lineage":["https://openalex.org/I41198531"]},{"id":"https://openalex.org/I4210113609","display_name":"Nation University","ror":"https://ror.org/01wacaj92","country_code":"TH","type":"education","lineage":["https://openalex.org/I4210113609"]}],"countries":["CN","TH"],"is_corresponding":false,"raw_author_name":"Kexue Sun","raw_affiliation_strings":["College of Electronic and Optical Engineering and College of Flexible Electronics (Future Technology), Nanjing University of Posts and Telecommunications, Nanjing, 210023, China","Nation\u2013Local Joint Project Engineering Laboratory of RF Integration and Micropackage, Nanjing, 210023, China"],"raw_orcid":"https://orcid.org/0000-0002-7045-3182","affiliations":[{"raw_affiliation_string":"College of Electronic and Optical Engineering and College of Flexible Electronics (Future Technology), Nanjing University of Posts and Telecommunications, Nanjing, 210023, China","institution_ids":["https://openalex.org/I41198531"]},{"raw_affiliation_string":"Nation\u2013Local Joint Project Engineering Laboratory of RF Integration and Micropackage, Nanjing, 210023, China","institution_ids":["https://openalex.org/I4210113609"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5100361625"],"corresponding_institution_ids":["https://openalex.org/I41198531"],"apc_list":{"value":1320,"currency":"GBP","value_usd":1619},"apc_paid":{"value":1320,"currency":"GBP","value_usd":1619},"fwci":0.4762,"has_fulltext":true,"cited_by_count":2,"citation_normalized_percentile":{"value":0.62880859,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":96},"biblio":{"volume":"10","issue":"6","first_page":"7863","last_page":"7875"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9824000000953674,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9824000000953674,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11222","display_name":"Magnetic Properties and Applications","score":0.9782000184059143,"subfield":{"id":"https://openalex.org/subfields/2504","display_name":"Electronic, Optical and Magnetic Materials"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11052","display_name":"Energy Load and Power Forecasting","score":0.9781000018119812,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/conjugate-gradient-method","display_name":"Conjugate gradient method","score":0.80098557472229},{"id":"https://openalex.org/keywords/computational-intelligence","display_name":"Computational intelligence","score":0.6498212814331055},{"id":"https://openalex.org/keywords/scheme","display_name":"Scheme (mathematics)","score":0.5713719129562378},{"id":"https://openalex.org/keywords/conjugate","display_name":"Conjugate","score":0.5509881973266602},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.5470768213272095},{"id":"https://openalex.org/keywords/applied-mathematics","display_name":"Applied mathematics","score":0.5032753348350525},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.4629759192466736},{"id":"https://openalex.org/keywords/pruning","display_name":"Pruning","score":0.445279598236084},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.42669156193733215},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.36472606658935547},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.34528398513793945},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.2439415156841278},{"id":"https://openalex.org/keywords/mathematical-analysis","display_name":"Mathematical analysis","score":0.17060983180999756},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.12770849466323853},{"id":"https://openalex.org/keywords/electrical-engineering","display_name":"Electrical engineering","score":0.08643317222595215}],"concepts":[{"id":"https://openalex.org/C81184566","wikidata":"https://www.wikidata.org/wiki/Q1191895","display_name":"Conjugate gradient method","level":2,"score":0.80098557472229},{"id":"https://openalex.org/C139502532","wikidata":"https://www.wikidata.org/wiki/Q1122090","display_name":"Computational intelligence","level":2,"score":0.6498212814331055},{"id":"https://openalex.org/C77618280","wikidata":"https://www.wikidata.org/wiki/Q1155772","display_name":"Scheme (mathematics)","level":2,"score":0.5713719129562378},{"id":"https://openalex.org/C197336794","wikidata":"https://www.wikidata.org/wiki/Q5161150","display_name":"Conjugate","level":2,"score":0.5509881973266602},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.5470768213272095},{"id":"https://openalex.org/C28826006","wikidata":"https://www.wikidata.org/wiki/Q33521","display_name":"Applied mathematics","level":1,"score":0.5032753348350525},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.4629759192466736},{"id":"https://openalex.org/C108010975","wikidata":"https://www.wikidata.org/wiki/Q500094","display_name":"Pruning","level":2,"score":0.445279598236084},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.42669156193733215},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.36472606658935547},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.34528398513793945},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2439415156841278},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.17060983180999756},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.12770849466323853},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.08643317222595215},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.0},{"id":"https://openalex.org/C6557445","wikidata":"https://www.wikidata.org/wiki/Q173113","display_name":"Agronomy","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1007/s40747-024-01595-w","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s40747-024-01595-w","pdf_url":"https://link.springer.com/content/pdf/10.1007/s40747-024-01595-w.pdf","source":{"id":"https://openalex.org/S3035462843","display_name":"Complex & Intelligent Systems","issn_l":"2198-6053","issn":["2198-6053","2199-4536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Complex &amp; Intelligent Systems","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:c357960ba562483a9b42cd02800ef899","is_oa":true,"landing_page_url":"https://doaj.org/article/c357960ba562483a9b42cd02800ef899","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Complex & Intelligent Systems, Vol 10, Iss 6, Pp 7863-7875 (2024)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1007/s40747-024-01595-w","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s40747-024-01595-w","pdf_url":"https://link.springer.com/content/pdf/10.1007/s40747-024-01595-w.pdf","source":{"id":"https://openalex.org/S3035462843","display_name":"Complex & Intelligent Systems","issn_l":"2198-6053","issn":["2198-6053","2199-4536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Complex &amp; Intelligent Systems","raw_type":"journal-article"},"sustainable_development_goals":[{"score":0.4300000071525574,"display_name":"Affordable and clean energy","id":"https://metadata.un.org/sdg/7"}],"awards":[{"id":"https://openalex.org/G6008101445","display_name":null,"funder_award_id":"202310293152E","funder_id":"https://openalex.org/F4320336008","funder_display_name":"National College Students Innovation and Entrepreneurship Training Program"}],"funders":[{"id":"https://openalex.org/F4320336008","display_name":"National College Students Innovation and Entrepreneurship Training Program","ror":null}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4401404075.pdf","grobid_xml":"https://content.openalex.org/works/W4401404075.grobid-xml"},"referenced_works_count":36,"referenced_works":["https://openalex.org/W2012231377","https://openalex.org/W2783538964","https://openalex.org/W2965862774","https://openalex.org/W2981857663","https://openalex.org/W3019166713","https://openalex.org/W3035251378","https://openalex.org/W3039554467","https://openalex.org/W3135148659","https://openalex.org/W3138516171","https://openalex.org/W3158376750","https://openalex.org/W3162462834","https://openalex.org/W3171125843","https://openalex.org/W3177232285","https://openalex.org/W3186001839","https://openalex.org/W3208552992","https://openalex.org/W3211127556","https://openalex.org/W4200046541","https://openalex.org/W4206634569","https://openalex.org/W4206706211","https://openalex.org/W4213019189","https://openalex.org/W4281489207","https://openalex.org/W4283275666","https://openalex.org/W4283815570","https://openalex.org/W4288735114","https://openalex.org/W4306680578","https://openalex.org/W4375928954","https://openalex.org/W4379985891","https://openalex.org/W4385573119","https://openalex.org/W4386083031","https://openalex.org/W4386826409","https://openalex.org/W4389104738","https://openalex.org/W4390872327","https://openalex.org/W4391136507","https://openalex.org/W4392698767","https://openalex.org/W6600504320","https://openalex.org/W6702248584"],"related_works":["https://openalex.org/W2143004018","https://openalex.org/W2330362314","https://openalex.org/W2435669318","https://openalex.org/W1995639836","https://openalex.org/W2103568534","https://openalex.org/W2057862416","https://openalex.org/W2754988198","https://openalex.org/W1543576583","https://openalex.org/W2290850064","https://openalex.org/W1503783781"],"abstract_inverted_index":{"Pre-trained":[0],"models":[1,42,57],"based":[2],"on":[3,170,181,204],"the":[4,12,50,62,71,92,113,122,131,144,148,171,176,182],"Transformer":[5],"architecture":[6],"have":[7],"significantly":[8,135],"advanced":[9],"research":[10],"within":[11],"domain":[13],"of":[14,55,65,100,121,147,158,189,200],"Natural":[15],"Language":[16],"Processing":[17],"(NLP)":[18],"due":[19],"to":[20,112,191,202],"their":[21],"superior":[22],"performance":[23,180],"and":[24,84,161,173,196],"extensive":[25],"applicability":[26],"across":[27],"multiple":[28],"technological":[29],"sectors.":[30],"Despite":[31],"these":[32,41],"advantages,":[33],"there":[34],"is":[35,110],"a":[36,82,98,155,187,197],"significant":[37],"challenge":[38],"in":[39,61,91,193],"optimizing":[40],"for":[43],"more":[44],"efficient":[45,85],"deployment.":[46],"To":[47,75],"be":[48],"concrete,":[49],"existing":[51],"post-training":[52],"pruning":[53,66,73,93,149,167,194],"frameworks":[54],"transformer":[56],"suffer":[58],"from":[59],"inefficiencies":[60],"crucial":[63],"stage":[64],"accuracy":[67],"recovery,":[68],"which":[69,116],"impacts":[70],"overall":[72],"efficiency.":[74,138],"address":[76],"this":[77,79,104],"issue,":[78],"paper":[80],"introduces":[81],"novel":[83],"iteration":[86,127],"scheme":[87],"with":[88],"conjugate":[89,101,140],"gradient":[90],"recovery":[94],"stage.":[95],"By":[96],"constructing":[97],"series":[99],"iterative":[102],"directions,":[103],"approach":[105],"ensures":[106],"each":[107,126],"optimization":[108],"step":[109],"orthogonal":[111],"previous":[114],"ones,":[115],"effectively":[117,129],"reduces":[118,143],"redundant":[119],"explorations":[120],"search":[123,137],"space.":[124],"Consequently,":[125],"progresses":[128],"towards":[130],"global":[132],"optimum,":[133],"thereby":[134],"enhancing":[136],"The":[139],"gradient-based":[141],"faster-pruner":[142,177],"time":[145,195],"expenditure":[146],"process":[150],"while":[151],"maintaining":[152],"accuracy,":[153],"demonstrating":[154],"high":[156],"degree":[157],"solution":[159],"stability":[160],"exceptional":[162],"model":[163],"acceleration":[164],"effects.":[165],"In":[166],"experiments":[168],"conducted":[169],"BERTBASE":[172],"DistilBERT":[174],"models,":[175],"exhibited":[178],"outstanding":[179],"GLUE":[183],"benchmark":[184],"dataset,":[185],"achieving":[186],"reduction":[188],"up":[190,201],"36.27%":[192],"speed":[198],"increase":[199],"1.45\u00d7":[203],"an":[205],"RTX":[206],"3090":[207],"GPU.":[208]},"counts_by_year":[{"year":2025,"cited_by_count":2}],"updated_date":"2026-03-12T06:13:28.667946","created_date":"2025-10-10T00:00:00"}
