{"id":"https://openalex.org/W4405709360","doi":"https://doi.org/10.1109/iscslp63861.2024.10800306","title":"An Optimizer for Conformer Based on Conjugate Gradient Method","display_name":"An Optimizer for Conformer Based on Conjugate Gradient Method","publication_year":2024,"publication_date":"2024-11-07","ids":{"openalex":"https://openalex.org/W4405709360","doi":"https://doi.org/10.1109/iscslp63861.2024.10800306"},"language":"en","primary_location":{"id":"doi:10.1109/iscslp63861.2024.10800306","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iscslp63861.2024.10800306","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE 14th International Symposium on Chinese Spoken Language Processing (ISCSLP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5109693003","display_name":"Wenyi Yu","orcid":null},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Wenyi Yu","raw_affiliation_strings":["Tsinghua University,Department of Electronic Engineering"],"affiliations":[{"raw_affiliation_string":"Tsinghua University,Department of Electronic Engineering","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100460071","display_name":"Chao Zhang","orcid":"https://orcid.org/0000-0001-6357-1095"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chao Zhang","raw_affiliation_strings":["Tsinghua University,Department of Electronic Engineering"],"affiliations":[{"raw_affiliation_string":"Tsinghua University,Department of Electronic Engineering","institution_ids":["https://openalex.org/I99065089"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5109693003"],"corresponding_institution_ids":["https://openalex.org/I99065089"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.21952304,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T14158","display_name":"Optical Systems and Laser Technology","score":0.9204999804496765,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T14158","display_name":"Optical Systems and Laser Technology","score":0.9204999804496765,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/conjugate-gradient-method","display_name":"Conjugate gradient method","score":0.7568415999412537},{"id":"https://openalex.org/keywords/conformational-isomerism","display_name":"Conformational isomerism","score":0.5946144461631775},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5418544411659241},{"id":"https://openalex.org/keywords/conjugate","display_name":"Conjugate","score":0.5317857265472412},{"id":"https://openalex.org/keywords/nonlinear-conjugate-gradient-method","display_name":"Nonlinear conjugate gradient method","score":0.4205498695373535},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.3822164237499237},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.29082274436950684},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.26914680004119873},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.24104180932044983},{"id":"https://openalex.org/keywords/gradient-descent","display_name":"Gradient descent","score":0.2129743993282318},{"id":"https://openalex.org/keywords/chemistry","display_name":"Chemistry","score":0.13939476013183594},{"id":"https://openalex.org/keywords/mathematical-analysis","display_name":"Mathematical analysis","score":0.09594801068305969},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.08039137721061707},{"id":"https://openalex.org/keywords/molecule","display_name":"Molecule","score":0.05828329920768738}],"concepts":[{"id":"https://openalex.org/C81184566","wikidata":"https://www.wikidata.org/wiki/Q1191895","display_name":"Conjugate gradient method","level":2,"score":0.7568415999412537},{"id":"https://openalex.org/C18705241","wikidata":"https://www.wikidata.org/wiki/Q1128023","display_name":"Conformational isomerism","level":3,"score":0.5946144461631775},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5418544411659241},{"id":"https://openalex.org/C197336794","wikidata":"https://www.wikidata.org/wiki/Q5161150","display_name":"Conjugate","level":2,"score":0.5317857265472412},{"id":"https://openalex.org/C26362088","wikidata":"https://www.wikidata.org/wiki/Q17086453","display_name":"Nonlinear conjugate gradient method","level":4,"score":0.4205498695373535},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.3822164237499237},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.29082274436950684},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.26914680004119873},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.24104180932044983},{"id":"https://openalex.org/C153258448","wikidata":"https://www.wikidata.org/wiki/Q1199743","display_name":"Gradient descent","level":3,"score":0.2129743993282318},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.13939476013183594},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.09594801068305969},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.08039137721061707},{"id":"https://openalex.org/C32909587","wikidata":"https://www.wikidata.org/wiki/Q11369","display_name":"Molecule","level":2,"score":0.05828329920768738},{"id":"https://openalex.org/C178790620","wikidata":"https://www.wikidata.org/wiki/Q11351","display_name":"Organic chemistry","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/iscslp63861.2024.10800306","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iscslp63861.2024.10800306","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE 14th International Symposium on Chinese Spoken Language Processing (ISCSLP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":24,"referenced_works":["https://openalex.org/W1632114991","https://openalex.org/W1836465849","https://openalex.org/W2006903949","https://openalex.org/W2064675550","https://openalex.org/W2125336414","https://openalex.org/W2130984546","https://openalex.org/W2963855604","https://openalex.org/W2964110616","https://openalex.org/W3097777922","https://openalex.org/W3169062464","https://openalex.org/W6608133726","https://openalex.org/W6628131027","https://openalex.org/W6631190155","https://openalex.org/W6638018090","https://openalex.org/W6674634876","https://openalex.org/W6683107984","https://openalex.org/W6684497637","https://openalex.org/W6748982233","https://openalex.org/W6752294818","https://openalex.org/W6770934496","https://openalex.org/W6778883912","https://openalex.org/W6780226713","https://openalex.org/W6791581255","https://openalex.org/W6796088036"],"related_works":["https://openalex.org/W2098528027","https://openalex.org/W2375129592","https://openalex.org/W2060532089","https://openalex.org/W3159557833","https://openalex.org/W2005717169","https://openalex.org/W2183734858","https://openalex.org/W4387168483","https://openalex.org/W4319027779","https://openalex.org/W2941778027","https://openalex.org/W1503783781"],"abstract_inverted_index":{"Optimization":[0],"is":[1,70,121],"a":[2,137],"fundamental":[3],"problem":[4],"in":[5,22,41,179],"AI":[6],"research.":[7],"Widely":[8],"used":[9],"first-order":[10],"optimization":[11,43,125],"methods":[12,65],"like":[13],"SGD":[14],"and":[15,31,50,57,108,127,136,150,164,188],"Adam":[16],"have":[17],"drawbacks,":[18],"such":[19],"as":[20],"difficulty":[21],"hyper-parameter":[23],"selection,":[24],"poor":[25],"performance":[26,126,185,195],"on":[27,76,146,184],"\u201cill-conditioned\u201d":[28],"loss":[29],"landscapes,":[30],"bottle-neck":[32],"of":[33,59,162,172,175],"efficiency":[34,161],"improvement.":[35],"This":[36,91],"has":[37],"led":[38],"to":[39,54,66,95,123],"interest":[40],"\u201csecond-order\u201d":[42,88],"methods,":[44],"which,":[45],"however,":[46],"face":[47],"high":[48],"space":[49],"time":[51],"complexity":[52],"due":[53],"the":[55,77,87,96,112,147,160,165,170,173,180,189],"storage":[56],"inversion":[58],"second-order":[60,64],"matrices.":[61],"Designing":[62],"practical":[63],"overcome":[67],"these":[68,83],"complexities":[69],"crucial.":[71],"The":[72],"Hessian-free":[73],"method":[74,80],"based":[75],"conjugate":[78],"gradient":[79],"(HFCG)":[81],"addresses":[82],"issues":[84],"by":[85,198],"approximating":[86],"updates":[89],"iteratively.":[90],"paper":[92],"extends":[93],"HFCG":[94,163,192],"Conformer":[97],"model,":[98],"implementing":[99],"directional":[100],"derivative":[101],"calculations":[102],"for":[103,111,153],"multi-head":[104],"self-attention,":[105],"convolution,":[106],"LayerNorm,":[107],"BatchNorm":[109],"modules":[110],"first":[113],"time.":[114],"A":[115],"two-stage":[116],"distributed":[117],"training":[118,129],"procedure":[119],"(TDTP)":[120],"introduced":[122],"enhance":[124],"reduce":[128,194],"time,":[130],"including":[131],"an":[132],"improved":[133],"update":[134],"strategy":[135],"new":[138,166],"CG":[139],"batch":[140],"extraction":[141],"method.":[142],"TDTP":[143,167],"was":[144,186,200],"tested":[145],"Penn":[148],"Treebank":[149],"AMI":[151],"datasets":[152],"language":[154],"modelling":[155],"tasks.":[156],"Ablation":[157],"experiments":[158],"demonstrated":[159],"strate-gies.":[168],"Additionally,":[169],"impact":[171],"choice":[174],"<tex":[176],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[177],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">$\\mathbf{H}_{L}$</tex>":[178],"generalized":[181],"Gauss-Newton":[182],"matrix":[183],"explored":[187],"hypothesis":[190],"that":[191],"might":[193],"differences":[196],"caused":[197],"normalization":[199],"proposed.":[201]},"counts_by_year":[],"updated_date":"2025-12-21T23:12:01.093139","created_date":"2025-10-10T00:00:00"}
