{"id":"https://openalex.org/W4412876893","doi":"https://doi.org/10.1145/3711896.3737008","title":"Reinvent the Operation not the Architecture: Quantum-inspired High-order Product for Compatible and Improved LLMs Training","display_name":"Reinvent the Operation not the Architecture: Quantum-inspired High-order Product for Compatible and Improved LLMs Training","publication_year":2025,"publication_date":"2025-08-03","ids":{"openalex":"https://openalex.org/W4412876893","doi":"https://doi.org/10.1145/3711896.3737008"},"language":"en","primary_location":{"id":"doi:10.1145/3711896.3737008","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3711896.3737008","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3711896.3737008","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 31st ACM SIGKDD Conference on Knowledge Discovery and Data Mining V.2","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3711896.3737008","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101800674","display_name":"Hao Xiong","orcid":"https://orcid.org/0000-0002-5605-066X"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Hao Xiong","raw_affiliation_strings":["School of Computer Science, Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5115596819","display_name":"Y. Yang","orcid":"https://orcid.org/0009-0004-2090-7459"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yebin Yang","raw_affiliation_strings":["School of Artificial Intelligence, Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"School of Artificial Intelligence, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071536418","display_name":"Hongbing Wu","orcid":"https://orcid.org/0009-0005-2626-3460"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Huaijin Wu","raw_affiliation_strings":["School of Computer Science, Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113711863","display_name":"Xu Zhong","orcid":"https://orcid.org/0009-0008-2349-816X"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaoqiu Zhong","raw_affiliation_strings":["School of Artificial Intelligence, Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"School of Artificial Intelligence, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5028007934","display_name":"Yehui Tang","orcid":null},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yehui Tang","raw_affiliation_strings":["School of Computer Science, Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5035553652","display_name":"Zhenwei Xia","orcid":null},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhuo Xia","raw_affiliation_strings":["School of Computer Science, Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5044186070","display_name":"Xiaoxing Wang","orcid":"https://orcid.org/0000-0002-7830-9521"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaoxing Wang","raw_affiliation_strings":["School of Artificial Intelligence, Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"School of Artificial Intelligence, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5087158377","display_name":"Junchi Yan","orcid":"https://orcid.org/0000-0001-9639-7679"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Junchi Yan","raw_affiliation_strings":["School of Artificial Intelligence, Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"School of Artificial Intelligence, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5101800674"],"corresponding_institution_ids":["https://openalex.org/I183067930"],"apc_list":null,"apc_paid":null,"fwci":4.1786,"has_fulltext":true,"cited_by_count":1,"citation_normalized_percentile":{"value":0.947593,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"3356","last_page":"3365"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11986","display_name":"Scientific Computing and Data Management","score":0.935699999332428,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11986","display_name":"Scientific Computing and Data Management","score":0.935699999332428,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/order","display_name":"Order (exchange)","score":0.6150478720664978},{"id":"https://openalex.org/keywords/architecture","display_name":"Architecture","score":0.5758975744247437},{"id":"https://openalex.org/keywords/product","display_name":"Product (mathematics)","score":0.5698802471160889},{"id":"https://openalex.org/keywords/training","display_name":"Training (meteorology)","score":0.5579518675804138},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.496481716632843},{"id":"https://openalex.org/keywords/business","display_name":"Business","score":0.2707303762435913},{"id":"https://openalex.org/keywords/history","display_name":"History","score":0.16157329082489014},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.13418814539909363},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.10587584972381592}],"concepts":[{"id":"https://openalex.org/C182306322","wikidata":"https://www.wikidata.org/wiki/Q1779371","display_name":"Order (exchange)","level":2,"score":0.6150478720664978},{"id":"https://openalex.org/C123657996","wikidata":"https://www.wikidata.org/wiki/Q12271","display_name":"Architecture","level":2,"score":0.5758975744247437},{"id":"https://openalex.org/C90673727","wikidata":"https://www.wikidata.org/wiki/Q901718","display_name":"Product (mathematics)","level":2,"score":0.5698802471160889},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.5579518675804138},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.496481716632843},{"id":"https://openalex.org/C144133560","wikidata":"https://www.wikidata.org/wiki/Q4830453","display_name":"Business","level":0,"score":0.2707303762435913},{"id":"https://openalex.org/C95457728","wikidata":"https://www.wikidata.org/wiki/Q309","display_name":"History","level":0,"score":0.16157329082489014},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.13418814539909363},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.10587584972381592},{"id":"https://openalex.org/C10138342","wikidata":"https://www.wikidata.org/wiki/Q43015","display_name":"Finance","level":1,"score":0.0},{"id":"https://openalex.org/C153294291","wikidata":"https://www.wikidata.org/wiki/Q25261","display_name":"Meteorology","level":1,"score":0.0},{"id":"https://openalex.org/C166957645","wikidata":"https://www.wikidata.org/wiki/Q23498","display_name":"Archaeology","level":1,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3711896.3737008","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3711896.3737008","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3711896.3737008","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 31st ACM SIGKDD Conference on Knowledge Discovery and Data Mining V.2","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3711896.3737008","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3711896.3737008","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3711896.3737008","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 31st ACM SIGKDD Conference on Knowledge Discovery and Data Mining V.2","raw_type":"proceedings-article"},"sustainable_development_goals":[{"display_name":"Industry, innovation and infrastructure","id":"https://metadata.un.org/sdg/9","score":0.4399999976158142}],"awards":[{"id":"https://openalex.org/G187169856","display_name":null,"funder_award_id":"62222607","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G391238517","display_name":null,"funder_award_id":", and","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6145335955","display_name":null,"funder_award_id":"22607","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4412876893.pdf","grobid_xml":"https://content.openalex.org/works/W4412876893.grobid-xml"},"referenced_works_count":17,"referenced_works":["https://openalex.org/W2890894339","https://openalex.org/W2970750583","https://openalex.org/W2986943082","https://openalex.org/W2990961515","https://openalex.org/W2998617917","https://openalex.org/W2999254124","https://openalex.org/W3033529678","https://openalex.org/W3129831491","https://openalex.org/W3175764193","https://openalex.org/W3194676777","https://openalex.org/W3214598513","https://openalex.org/W4281567728","https://openalex.org/W4295838474","https://openalex.org/W4388979610","https://openalex.org/W6600109629","https://openalex.org/W6601692187","https://openalex.org/W6893007900"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W230091440","https://openalex.org/W2390279801","https://openalex.org/W2233261550","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2775171296"],"abstract_inverted_index":{"We":[0,35],"rethink":[1],"the":[2,21,37,52,72,84,116,161,169],"basic":[3],"operations,":[4,94],"i.e.,":[5,98],"inner":[6],"product":[7],"and":[8,51,83,134,154,176],"matrix":[9],"multiplication":[10],"used":[11],"in":[12,71,75,165],"neural":[13],"networks.":[14],"A":[15],"quantum-inspired":[16],"alternative":[17],"is":[18,56],"proposed,":[19],"utilizing":[20],"power":[22],"of":[23,32,141,163],"high-dimensional":[24],"Hilbert":[25],"space":[26],"by":[27,113,123],"devising":[28],"a":[29,110],"high-order":[30,44],"form":[31],"tensor":[33],"product.":[34],"re-parameterize":[36],"original":[38],"(low-order)":[39],"vectors/matrices":[40],"into":[41],"an":[42,62],"expressive":[43],"form,":[45],"without":[46],"incurring":[47],"extra":[48,53],"model":[49,112],"parameters,":[50],"computational":[54],"overhead":[55],"negligible":[57],"(e.g.,":[58],"about":[59],"2%).":[60],"As":[61,121],"in-place":[63],"transparent":[64],"atomic":[65],"operation,":[66],"we":[67,95],"show":[68],"its":[69,88],"use":[70],"key":[73],"components":[74],"Transformers:":[76],"token":[77],"embeddings,":[78],"attentions":[79],"(query,":[80],"key,":[81],"value)":[82],"MLP.":[85],"Due":[86],"to":[87,91,108,168],"inherent":[89],"compatibility":[90],"vanilla":[92],"multiplicative":[93],"propose":[96],"C2Q-SFT,":[97],"classic-to-quantum":[99],"(C2Q)":[100],"protocol":[101],"for":[102,129],"supervised":[103],"fine-tuning":[104,135],"(SFT):":[105],"it":[106,126],"continues":[107],"train":[109],"given":[111],"transparently":[114],"replacing":[115],"standard":[117,146],"operations":[118,164],"with":[119,148],"ours.":[120],"shown":[122],"our":[124],"experiments,":[125],"shows":[127],"advantages":[128],"both":[130],"training":[131,177],"from":[132],"scratch":[133],"on":[136,151,160,171],"downstream":[137],"tasks":[138],"across":[139],"scales":[140],"LLMs.":[142],"C2Q-SFT":[143],"consistently":[144],"outperforms":[145],"SFT,":[147],"relative":[149],"improvements":[150],"MMLU":[152],"(+0.56%)":[153],"GSM8k":[155],"(+0.61%).":[156],"It":[157],"sheds":[158],"light":[159],"innovation":[162],"networks,":[166],"orthogonal":[167],"efforts":[170],"new":[172],"architecture,":[173],"position":[174],"encoding,":[175],"algorithms,":[178],"etc.":[179],"See":[180],"project":[181],"page":[182],"at:":[183],"https://github.com/Thinklab-SJTU/LLM/QI-LLM.":[184]},"counts_by_year":[{"year":2026,"cited_by_count":1}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
