{"id":"https://openalex.org/W4416252223","doi":"https://doi.org/10.1109/ijcnn64981.2025.11228585","title":"TensorLLM: Tensorising Multi-Head Attention for Enhanced Reasoning and Compression in LLMs","display_name":"TensorLLM: Tensorising Multi-Head Attention for Enhanced Reasoning and Compression in LLMs","publication_year":2025,"publication_date":"2025-06-30","ids":{"openalex":"https://openalex.org/W4416252223","doi":"https://doi.org/10.1109/ijcnn64981.2025.11228585"},"language":null,"primary_location":{"id":"doi:10.1109/ijcnn64981.2025.11228585","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn64981.2025.11228585","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5073038540","display_name":"Yixin Gu","orcid":"https://orcid.org/0000-0002-0454-8431"},"institutions":[{"id":"https://openalex.org/I47508984","display_name":"Imperial College London","ror":"https://ror.org/041kmwe10","country_code":"GB","type":"education","lineage":["https://openalex.org/I47508984"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Yuxuan Gu","raw_affiliation_strings":["Imperial College,Department of Electrical and Electronic Engineering,London,United Kingdom"],"affiliations":[{"raw_affiliation_string":"Imperial College,Department of Electrical and Electronic Engineering,London,United Kingdom","institution_ids":["https://openalex.org/I47508984"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5065141875","display_name":"Wuyang Zhou","orcid":"https://orcid.org/0000-0003-2229-2852"},"institutions":[{"id":"https://openalex.org/I47508984","display_name":"Imperial College London","ror":"https://ror.org/041kmwe10","country_code":"GB","type":"education","lineage":["https://openalex.org/I47508984"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Wuyang Zhou","raw_affiliation_strings":["Imperial College,Department of Electrical and Electronic Engineering,London,United Kingdom"],"affiliations":[{"raw_affiliation_string":"Imperial College,Department of Electrical and Electronic Engineering,London,United Kingdom","institution_ids":["https://openalex.org/I47508984"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5094208275","display_name":"Giorgos Iacovides","orcid":"https://orcid.org/0009-0007-5733-8992"},"institutions":[{"id":"https://openalex.org/I47508984","display_name":"Imperial College London","ror":"https://ror.org/041kmwe10","country_code":"GB","type":"education","lineage":["https://openalex.org/I47508984"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Giorgos Iacovides","raw_affiliation_strings":["Imperial College,Department of Electrical and Electronic Engineering,London,United Kingdom"],"affiliations":[{"raw_affiliation_string":"Imperial College,Department of Electrical and Electronic Engineering,London,United Kingdom","institution_ids":["https://openalex.org/I47508984"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5103001848","display_name":"Danilo P. Mandic","orcid":"https://orcid.org/0000-0001-8432-3963"},"institutions":[{"id":"https://openalex.org/I47508984","display_name":"Imperial College London","ror":"https://ror.org/041kmwe10","country_code":"GB","type":"education","lineage":["https://openalex.org/I47508984"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Danilo Mandic","raw_affiliation_strings":["Imperial College,Department of Electrical and Electronic Engineering,London,United Kingdom"],"affiliations":[{"raw_affiliation_string":"Imperial College,Department of Electrical and Electronic Engineering,London,United Kingdom","institution_ids":["https://openalex.org/I47508984"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5073038540"],"corresponding_institution_ids":["https://openalex.org/I47508984"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.1951792,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"8"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.28189998865127563,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.28189998865127563,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.09679999947547913,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13702","display_name":"Machine Learning in Healthcare","score":0.0868000015616417,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.619700014591217},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.4512999951839447},{"id":"https://openalex.org/keywords/compression","display_name":"Compression (physics)","score":0.45010000467300415},{"id":"https://openalex.org/keywords/noise-reduction","display_name":"Noise reduction","score":0.4189000129699707},{"id":"https://openalex.org/keywords/data-compression","display_name":"Data compression","score":0.4133000075817108},{"id":"https://openalex.org/keywords/case-based-reasoning","display_name":"Case-based reasoning","score":0.398499995470047}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6758000254631042},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.619700014591217},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5195000171661377},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.4512999951839447},{"id":"https://openalex.org/C180016635","wikidata":"https://www.wikidata.org/wiki/Q2712821","display_name":"Compression (physics)","level":2,"score":0.45010000467300415},{"id":"https://openalex.org/C163294075","wikidata":"https://www.wikidata.org/wiki/Q581861","display_name":"Noise reduction","level":2,"score":0.4189000129699707},{"id":"https://openalex.org/C78548338","wikidata":"https://www.wikidata.org/wiki/Q2493","display_name":"Data compression","level":2,"score":0.4133000075817108},{"id":"https://openalex.org/C20162079","wikidata":"https://www.wikidata.org/wiki/Q1151406","display_name":"Case-based reasoning","level":2,"score":0.398499995470047},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.3871000111103058},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3686999976634979},{"id":"https://openalex.org/C32834561","wikidata":"https://www.wikidata.org/wiki/Q660730","display_name":"Subspace topology","level":2,"score":0.33070001006126404},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.3296999931335449},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.25600001215934753}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/ijcnn64981.2025.11228585","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn64981.2025.11228585","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":21,"referenced_works":["https://openalex.org/W1840435438","https://openalex.org/W1963826206","https://openalex.org/W2013912476","https://openalex.org/W2018282388","https://openalex.org/W2024165284","https://openalex.org/W2033368244","https://openalex.org/W2113055885","https://openalex.org/W2119412403","https://openalex.org/W2889787757","https://openalex.org/W2909212904","https://openalex.org/W2943845043","https://openalex.org/W2950784811","https://openalex.org/W2963542740","https://openalex.org/W2963961878","https://openalex.org/W2970419734","https://openalex.org/W2972324944","https://openalex.org/W2972498556","https://openalex.org/W3136363192","https://openalex.org/W4385245566","https://openalex.org/W4402715946","https://openalex.org/W4411306089"],"related_works":[],"abstract_inverted_index":{"The":[0],"reasoning":[1,109,169],"abilities":[2],"of":[3,27,45,83,96,111,128],"Large":[4],"Language":[5],"Models":[6],"(LLMs)":[7],"can":[8,32,154],"be":[9,155],"improved":[10],"by":[11,87],"structurally":[12],"denoising":[13,22,80,161],"their":[14],"weights,":[15,86,137],"yet":[16],"existing":[17,159],"techniques":[18,162],"primarily":[19],"focus":[20],"on":[21],"the":[23,28,36,43,72,84,94,97,108,135,151],"feed-forward":[24],"network":[25],"(FFN)":[26],"transformer":[29,46],"block,":[30,40],"and":[31,71,81,117,121],"not":[33],"efficiently":[34],"utilise":[35],"Multi-head":[37],"Attention":[38],"(MHA)":[39],"which":[41],"is":[42],"core":[44],"architectures.":[47],"To":[48],"address":[49],"this":[50,104],"issue,":[51],"we":[52,148],"propose":[53],"a":[54,67,89],"novel":[55],"intuitive":[56],"framework":[57],"that,":[58],"at":[59],"its":[60],"very":[61],"core,":[62],"performs":[63],"MHA":[64,85,136],"compression":[65,82,126],"through":[66],"multi-head":[68],"tensorisation":[69],"process":[70],"Tucker":[73],"decomposition.":[74],"This":[75],"enables":[76],"both":[77,119],"higher-dimensional":[78,91],"structured":[79],"enforcing":[88],"shared":[90],"subspace":[92],"across":[93,113],"weights":[95],"multiple":[98,114],"attention":[99],"heads.":[100],"We":[101],"demonstrate":[102],"that":[103,150],"approach":[105],"consistently":[106],"enhances":[107],"capabilities":[110],"LLMs":[112],"benchmark":[115],"datasets,":[116],"for":[118],"encoder-only":[120],"decoder-only":[122],"architectures,":[123],"while":[124],"achieving":[125],"rates":[127],"up":[129],"to":[130,163],"~":[131],"250":[132],"times":[133],"in":[134,167],"all":[138],"without":[139],"requiring":[140],"any":[141],"additional":[142],"data,":[143],"training,":[144],"or":[145],"fine-tuning.":[146],"Furthermore,":[147],"show":[149],"proposed":[152],"method":[153],"seamlessly":[156],"combined":[157],"with":[158],"FFN-only-based":[160],"achieve":[164],"further":[165],"improvements":[166],"LLM":[168],"performance.<sup":[170],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[171],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">1</sup>":[172]},"counts_by_year":[],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-11-14T00:00:00"}
