{"id":"https://openalex.org/W7127197279","doi":"https://doi.org/10.1007/s42979-025-04704-9","title":"Zeroth-Order Kronecker Optimization for Pretraining Language Models","display_name":"Zeroth-Order Kronecker Optimization for Pretraining Language Models","publication_year":2026,"publication_date":"2026-02-02","ids":{"openalex":"https://openalex.org/W7127197279","doi":"https://doi.org/10.1007/s42979-025-04704-9"},"language":"en","primary_location":{"id":"doi:10.1007/s42979-025-04704-9","is_oa":false,"landing_page_url":"https://doi.org/10.1007/s42979-025-04704-9","pdf_url":null,"source":{"id":"https://openalex.org/S4210174798","display_name":"SN Computer Science","issn_l":"2661-8907","issn":["2661-8907","2662-995X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319965","host_organization_name":"Springer Nature","host_organization_lineage":["https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Nature"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"SN Computer Science","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5116465563","display_name":"Nathan Allaire","orcid":null},"institutions":[{"id":"https://openalex.org/I4210133247","display_name":"Group for Research in Decision Analysis","ror":"https://ror.org/02pkvpx84","country_code":"CA","type":"facility","lineage":["https://openalex.org/I108192572","https://openalex.org/I159129438","https://openalex.org/I4210133247","https://openalex.org/I45683168","https://openalex.org/I49663120","https://openalex.org/I5023651"]},{"id":"https://openalex.org/I45683168","display_name":"Polytechnique Montr\u00e9al","ror":"https://ror.org/05f8d4e86","country_code":"CA","type":"education","lineage":["https://openalex.org/I45683168"]}],"countries":["CA"],"is_corresponding":true,"raw_author_name":"Nathan Allaire","raw_affiliation_strings":["D\u00e9partement de Math\u00e9matiques et G\u00e9nie Industriel, Polytechnique Montr\u00e9al, 2500 Chemin de Polytechnique, Montr\u00e9al, QC, H3T 0A3, Canada","GERAD, 2920 Chemin de la Tour, Montr\u00e9al, QC, H3T 1N8, Canada"],"raw_orcid":"https://orcid.org/0009-0006-0694-8216","affiliations":[{"raw_affiliation_string":"D\u00e9partement de Math\u00e9matiques et G\u00e9nie Industriel, Polytechnique Montr\u00e9al, 2500 Chemin de Polytechnique, Montr\u00e9al, QC, H3T 0A3, Canada","institution_ids":["https://openalex.org/I45683168"]},{"raw_affiliation_string":"GERAD, 2920 Chemin de la Tour, Montr\u00e9al, QC, H3T 1N8, Canada","institution_ids":["https://openalex.org/I4210133247"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5022173011","display_name":"S\u00e9bastien Le Digabel","orcid":"https://orcid.org/0000-0003-3148-5090"},"institutions":[{"id":"https://openalex.org/I4210133247","display_name":"Group for Research in Decision Analysis","ror":"https://ror.org/02pkvpx84","country_code":"CA","type":"facility","lineage":["https://openalex.org/I108192572","https://openalex.org/I159129438","https://openalex.org/I4210133247","https://openalex.org/I45683168","https://openalex.org/I49663120","https://openalex.org/I5023651"]},{"id":"https://openalex.org/I45683168","display_name":"Polytechnique Montr\u00e9al","ror":"https://ror.org/05f8d4e86","country_code":"CA","type":"education","lineage":["https://openalex.org/I45683168"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"S\u00e9bastien Le Digabel","raw_affiliation_strings":["D\u00e9partement de Math\u00e9matiques et G\u00e9nie Industriel, Polytechnique Montr\u00e9al, 2500 Chemin de Polytechnique, Montr\u00e9al, QC, H3T 0A3, Canada","GERAD, 2920 Chemin de la Tour, Montr\u00e9al, QC, H3T 1N8, Canada"],"raw_orcid":"https://orcid.org/0000-0003-3148-5090","affiliations":[{"raw_affiliation_string":"D\u00e9partement de Math\u00e9matiques et G\u00e9nie Industriel, Polytechnique Montr\u00e9al, 2500 Chemin de Polytechnique, Montr\u00e9al, QC, H3T 0A3, Canada","institution_ids":["https://openalex.org/I45683168"]},{"raw_affiliation_string":"GERAD, 2920 Chemin de la Tour, Montr\u00e9al, QC, H3T 1N8, Canada","institution_ids":["https://openalex.org/I4210133247"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5122981846","display_name":"Dominique Orban","orcid":null},"institutions":[{"id":"https://openalex.org/I4210133247","display_name":"Group for Research in Decision Analysis","ror":"https://ror.org/02pkvpx84","country_code":"CA","type":"facility","lineage":["https://openalex.org/I108192572","https://openalex.org/I159129438","https://openalex.org/I4210133247","https://openalex.org/I45683168","https://openalex.org/I49663120","https://openalex.org/I5023651"]},{"id":"https://openalex.org/I45683168","display_name":"Polytechnique Montr\u00e9al","ror":"https://ror.org/05f8d4e86","country_code":"CA","type":"education","lineage":["https://openalex.org/I45683168"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Dominique Orban","raw_affiliation_strings":["D\u00e9partement de Math\u00e9matiques et G\u00e9nie Industriel, Polytechnique Montr\u00e9al, 2500 Chemin de Polytechnique, Montr\u00e9al, QC, H3T 0A3, Canada","GERAD, 2920 Chemin de la Tour, Montr\u00e9al, QC, H3T 1N8, Canada"],"raw_orcid":"https://orcid.org/0000-0002-8017-7687","affiliations":[{"raw_affiliation_string":"D\u00e9partement de Math\u00e9matiques et G\u00e9nie Industriel, Polytechnique Montr\u00e9al, 2500 Chemin de Polytechnique, Montr\u00e9al, QC, H3T 0A3, Canada","institution_ids":["https://openalex.org/I45683168"]},{"raw_affiliation_string":"GERAD, 2920 Chemin de la Tour, Montr\u00e9al, QC, H3T 1N8, Canada","institution_ids":["https://openalex.org/I4210133247"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5052855957","display_name":"Vahid Partovi Nia","orcid":"https://orcid.org/0000-0001-6673-4224"},"institutions":[{"id":"https://openalex.org/I4210115038","display_name":"Huawei Technologies (Canada)","ror":"https://ror.org/026venb53","country_code":"CA","type":"company","lineage":["https://openalex.org/I2250955327","https://openalex.org/I4210115038"]},{"id":"https://openalex.org/I4210133247","display_name":"Group for Research in Decision Analysis","ror":"https://ror.org/02pkvpx84","country_code":"CA","type":"facility","lineage":["https://openalex.org/I108192572","https://openalex.org/I159129438","https://openalex.org/I4210133247","https://openalex.org/I45683168","https://openalex.org/I49663120","https://openalex.org/I5023651"]},{"id":"https://openalex.org/I4210159102","display_name":"Huawei Technologies (Sweden)","ror":"https://ror.org/0500fyd17","country_code":"SE","type":"company","lineage":["https://openalex.org/I2250955327","https://openalex.org/I4210159102"]},{"id":"https://openalex.org/I45683168","display_name":"Polytechnique Montr\u00e9al","ror":"https://ror.org/05f8d4e86","country_code":"CA","type":"education","lineage":["https://openalex.org/I45683168"]}],"countries":["CA","SE"],"is_corresponding":false,"raw_author_name":"Vahid Partovi Nia","raw_affiliation_strings":["D\u00e9partement de Math\u00e9matiques et G\u00e9nie Industriel, Polytechnique Montr\u00e9al, 2500 Chemin de Polytechnique, Montr\u00e9al, QC, H3T 0A3, Canada","GERAD, 2920 Chemin de la Tour, Montr\u00e9al, QC, H3T 1N8, Canada","Huawei Noah\u2019s Ark Lab, 7101 Avenue du Parc, Montr\u00e9al, QC, H3N 1X9, Canada"],"raw_orcid":"https://orcid.org/0000-0001-6673-4224","affiliations":[{"raw_affiliation_string":"D\u00e9partement de Math\u00e9matiques et G\u00e9nie Industriel, Polytechnique Montr\u00e9al, 2500 Chemin de Polytechnique, Montr\u00e9al, QC, H3T 0A3, Canada","institution_ids":["https://openalex.org/I45683168"]},{"raw_affiliation_string":"GERAD, 2920 Chemin de la Tour, Montr\u00e9al, QC, H3T 1N8, Canada","institution_ids":["https://openalex.org/I4210133247"]},{"raw_affiliation_string":"Huawei Noah\u2019s Ark Lab, 7101 Avenue du Parc, Montr\u00e9al, QC, H3N 1X9, Canada","institution_ids":["https://openalex.org/I4210159102","https://openalex.org/I4210115038"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5116465563"],"corresponding_institution_ids":["https://openalex.org/I4210133247","https://openalex.org/I45683168"],"apc_list":{"value":2290,"currency":"EUR","value_usd":2890},"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.19602707,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"7","issue":"2","first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.14139999449253082,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.14139999449253082,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.08720000088214874,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.07829999923706055,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.5666999816894531},{"id":"https://openalex.org/keywords/subspace-topology","display_name":"Subspace topology","score":0.5285999774932861},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.5248000025749207},{"id":"https://openalex.org/keywords/layer","display_name":"Layer (electronics)","score":0.4593000113964081},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.36230000853538513},{"id":"https://openalex.org/keywords/question-answering","display_name":"Question answering","score":0.35199999809265137},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.3456000089645386},{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.33009999990463257},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3248000144958496}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7067000269889832},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.5666999816894531},{"id":"https://openalex.org/C32834561","wikidata":"https://www.wikidata.org/wiki/Q660730","display_name":"Subspace topology","level":2,"score":0.5285999774932861},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.5248000025749207},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.491100013256073},{"id":"https://openalex.org/C2779227376","wikidata":"https://www.wikidata.org/wiki/Q6505497","display_name":"Layer (electronics)","level":2,"score":0.4593000113964081},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.36230000853538513},{"id":"https://openalex.org/C44291984","wikidata":"https://www.wikidata.org/wiki/Q1074173","display_name":"Question answering","level":2,"score":0.35199999809265137},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3508000075817108},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.3456000089645386},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.33009999990463257},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3248000144958496},{"id":"https://openalex.org/C33676613","wikidata":"https://www.wikidata.org/wiki/Q13415176","display_name":"Dimension (graph theory)","level":2,"score":0.32260000705718994},{"id":"https://openalex.org/C137836250","wikidata":"https://www.wikidata.org/wiki/Q984063","display_name":"Optimization problem","level":2,"score":0.32109999656677246},{"id":"https://openalex.org/C180016635","wikidata":"https://www.wikidata.org/wiki/Q2712821","display_name":"Compression (physics)","level":2,"score":0.319599986076355},{"id":"https://openalex.org/C149629883","wikidata":"https://www.wikidata.org/wiki/Q660926","display_name":"Fraction (chemistry)","level":2,"score":0.3167000114917755},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3138999938964844},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.3093999922275543},{"id":"https://openalex.org/C45347329","wikidata":"https://www.wikidata.org/wiki/Q5166604","display_name":"Convolution (computer science)","level":3,"score":0.3082999885082245},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.2904999852180481},{"id":"https://openalex.org/C78548338","wikidata":"https://www.wikidata.org/wiki/Q2493","display_name":"Data compression","level":2,"score":0.28299999237060547},{"id":"https://openalex.org/C39482219","wikidata":"https://www.wikidata.org/wiki/Q192826","display_name":"Kronecker delta","level":2,"score":0.2791999876499176},{"id":"https://openalex.org/C2780428219","wikidata":"https://www.wikidata.org/wiki/Q16952335","display_name":"Cover (algebra)","level":2,"score":0.2782999873161316},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.26840001344680786},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.2669999897480011},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.2637999951839447},{"id":"https://openalex.org/C2776207758","wikidata":"https://www.wikidata.org/wiki/Q5303302","display_name":"Downstream (manufacturing)","level":2,"score":0.2635999917984009},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.26269999146461487},{"id":"https://openalex.org/C200873422","wikidata":"https://www.wikidata.org/wiki/Q5448821","display_name":"Filling-in","level":2,"score":0.2533000111579895},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.2529999911785126},{"id":"https://openalex.org/C4679612","wikidata":"https://www.wikidata.org/wiki/Q866298","display_name":"Aggregate (composite)","level":2,"score":0.25110000371932983}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1007/s42979-025-04704-9","is_oa":false,"landing_page_url":"https://doi.org/10.1007/s42979-025-04704-9","pdf_url":null,"source":{"id":"https://openalex.org/S4210174798","display_name":"SN Computer Science","issn_l":"2661-8907","issn":["2661-8907","2662-995X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319965","host_organization_name":"Springer Nature","host_organization_lineage":["https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Nature"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"SN Computer Science","raw_type":"journal-article"},{"id":"pmh:oai:publications.polymtl.ca:72402","is_oa":false,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4306401013","display_name":"PolyPublie (\u00c9cole Polytechnique de Montr\u00e9al)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I45683168","host_organization_name":"Polytechnique Montr\u00e9al","host_organization_lineage":["https://openalex.org/I45683168"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"acceptedVersion","is_accepted":true,"is_published":false,"raw_source_name":null,"raw_type":"PeerReviewed"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1268051983","display_name":null,"funder_award_id":"RGPIN-2025-06911","funder_id":"https://openalex.org/F4320334593","funder_display_name":"Natural Sciences and Engineering Research Council of Canada"},{"id":"https://openalex.org/G4827217","display_name":null,"funder_award_id":"RGPIN- 2024-05086","funder_id":"https://openalex.org/F4320334593","funder_display_name":"Natural Sciences and Engineering Research Council of Canada"},{"id":"https://openalex.org/G5095711811","display_name":null,"funder_award_id":"RGPIN-2020-06535","funder_id":"https://openalex.org/F4320334593","funder_display_name":"Natural Sciences and Engineering Research Council of Canada"}],"funders":[{"id":"https://openalex.org/F4320334593","display_name":"Natural Sciences and Engineering Research Council of Canada","ror":"https://ror.org/01h531d29"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":12,"referenced_works":["https://openalex.org/W1979761486","https://openalex.org/W1998855065","https://openalex.org/W2034996255","https://openalex.org/W2124289529","https://openalex.org/W2149479912","https://openalex.org/W2766284800","https://openalex.org/W2775085189","https://openalex.org/W2963470657","https://openalex.org/W2963711014","https://openalex.org/W2981892732","https://openalex.org/W3034238904","https://openalex.org/W4408062977"],"related_works":[],"abstract_inverted_index":null,"counts_by_year":[],"updated_date":"2026-05-05T08:41:31.759640","created_date":"2026-02-03T00:00:00"}
