{"id":"https://openalex.org/W7134270385","doi":"https://doi.org/10.48550/arxiv.2603.05727","title":"Structured Multidimensional Representation Learning for Large Language Models","display_name":"Structured Multidimensional Representation Learning for Large Language Models","publication_year":2026,"publication_date":"2026-03-05","ids":{"openalex":"https://openalex.org/W7134270385","doi":"https://doi.org/10.48550/arxiv.2603.05727"},"language":null,"primary_location":{"id":"pmh:doi:10.48550/arxiv.2603.05727","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5089029549","display_name":"A. El Ichi","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Ichi, Alaa El","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128398613","display_name":"Khalide Jbilou","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jbilou, Khalide","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5115924309","display_name":"Mohamed El Guide","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Guide, Mohamed El","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5128495993","display_name":"Franck Dufrenois","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dufrenois, Franck","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5089029549"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.36629998683929443,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.36629998683929443,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.12950000166893005,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.07440000027418137,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.7121000289916992},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.6322000026702881},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.5002999901771545},{"id":"https://openalex.org/keywords/discrete-cosine-transform","display_name":"Discrete cosine transform","score":0.41179999709129333},{"id":"https://openalex.org/keywords/redundancy","display_name":"Redundancy (engineering)","score":0.38600000739097595},{"id":"https://openalex.org/keywords/scaling","display_name":"Scaling","score":0.38179999589920044},{"id":"https://openalex.org/keywords/trigonometric-functions","display_name":"Trigonometric functions","score":0.37720000743865967},{"id":"https://openalex.org/keywords/matrix-decomposition","display_name":"Matrix decomposition","score":0.3686000108718872}],"concepts":[{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.7121000289916992},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.6322000026702881},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5034000277519226},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.5002999901771545},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.49000000953674316},{"id":"https://openalex.org/C2221639","wikidata":"https://www.wikidata.org/wiki/Q2877","display_name":"Discrete cosine transform","level":3,"score":0.41179999709129333},{"id":"https://openalex.org/C152124472","wikidata":"https://www.wikidata.org/wiki/Q1204361","display_name":"Redundancy (engineering)","level":2,"score":0.38600000739097595},{"id":"https://openalex.org/C99844830","wikidata":"https://www.wikidata.org/wiki/Q102441924","display_name":"Scaling","level":2,"score":0.38179999589920044},{"id":"https://openalex.org/C178009071","wikidata":"https://www.wikidata.org/wiki/Q93344","display_name":"Trigonometric functions","level":2,"score":0.37720000743865967},{"id":"https://openalex.org/C42355184","wikidata":"https://www.wikidata.org/wiki/Q1361088","display_name":"Matrix decomposition","level":3,"score":0.3686000108718872},{"id":"https://openalex.org/C136886441","wikidata":"https://www.wikidata.org/wiki/Q926129","display_name":"Normalization (sociology)","level":2,"score":0.36419999599456787},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.33709999918937683},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.3305000066757202},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.3158999979496002},{"id":"https://openalex.org/C187834632","wikidata":"https://www.wikidata.org/wiki/Q188804","display_name":"Factorization","level":2,"score":0.2906000018119812},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.2815000116825104},{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.2630000114440918},{"id":"https://openalex.org/C202615002","wikidata":"https://www.wikidata.org/wiki/Q783507","display_name":"Differentiable function","level":2,"score":0.25949999690055847},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.25859999656677246},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.257999986410141},{"id":"https://openalex.org/C39927690","wikidata":"https://www.wikidata.org/wiki/Q11197","display_name":"Logarithm","level":2,"score":0.25459998846054077},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.2540999948978424},{"id":"https://openalex.org/C91458471","wikidata":"https://www.wikidata.org/wiki/Q17096468","display_name":"Lapped transform","level":5,"score":0.2540000081062317}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:doi:10.48550/arxiv.2603.05727","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2603.05727","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.05727","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2603.05727","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Transformer":[0,74,88],"architectures":[1],"achieve":[2],"state-of-the-art":[3],"performance":[4,231],"across":[5],"a":[6,37,72,133,215,222],"wide":[7],"range":[8],"of":[9,41],"pattern":[10],"recognition":[11],"and":[12,26,60,63,119,144,172],"natural":[13],"language":[14],"processing":[15],"tasks,":[16],"but":[17],"their":[18],"scaling":[19,165],"is":[20,96],"accompanied":[21],"by":[22],"substantial":[23],"parameter":[24],"growth":[25],"redundancy":[27],"in":[28,66,122,219],"the":[29,42,47,67,78,93,139,152,176,195,202],"embedding":[30,43,128,160],"dimension.":[31],"In":[32],"this":[33],"work,":[34],"we":[35,70,213],"introduce":[36],"structured":[38],"spectral":[39,57,83,153],"factorization":[40],"space":[44],"based":[45],"on":[46,104,170,208],"L-product":[48],"for":[49,187,221],"third-order":[50],"tensors.":[51],"By":[52],"reshaping":[53],"token":[54],"representations":[55],"into":[56,80],"tensor":[58],"slices":[59],"performing":[61],"attention":[62],"feed-forward":[64],"operations":[65],"transform":[68],"domain,":[69],"obtain":[71],"Tensor":[73],"architecture":[75],"that":[76,92,166,175],"decomposes":[77],"encoder":[79,123,182,197,225],"p":[81,100],"independent":[82],"sub-transformers":[84],"while":[85,189],"preserving":[86],"standard":[87,203],"semantics.":[89],"We":[90],"prove":[91],"proposed":[94,177],"L-Transformer":[95],"spectrally":[97],"equivalent":[98],"to":[99,113,185,233],"parallel":[101],"Transformers":[102],"operating":[103],"reduceddimensional":[105],"embeddings,":[106],"which":[107],"yields":[108],"approximately":[109],"1/p":[110],"reduction":[111],"(up":[112,184],"lower-order":[114],"terms":[115],"such":[116],"as":[117],"biases":[118],"normalization":[120],"parameters)":[121],"parameters":[124,183],"under":[125,205],"fixed":[126],"total":[127],"size.":[129],"When":[130],"instantiated":[131],"with":[132,146],"real-valued":[134],"Discrete":[135],"Cosine":[136],"Transform":[137],"(DCT),":[138],"method":[140],"remains":[141],"fully":[142],"differentiable":[143],"compatible":[145],"existing":[147],"training":[148],"pipelines.":[149],"Beyond":[150],"compression,":[151,206],"decomposition":[154],"introduces":[155],"an":[156],"inductive":[157],"bias":[158],"over":[159],"frequencies,":[161],"enabling":[162],"slice-dependent":[163],"frequency":[164],"improves":[167,200],"generalization.":[168],"Experiments":[169],"IMDB":[171],"AG~News":[173,209],"show":[174],"model":[178],"can":[179],"substantially":[180],"reduce":[181],"75\\%":[186],"p=4)":[188],"maintaining":[190],"competitive":[191],"accuracy.":[192],"On":[193],"IMDB,":[194],"tensorized":[196],"matches":[198],"or":[199],"upon":[201],"baseline":[204],"whereas":[207],"at":[210,227],"moderate":[211],"width":[212,229],"observe":[214],"small":[216],"accuracy":[217],"decrease":[218],"exchange":[220],"4":[223],"times":[224],"reduction;":[226],"BERT-base":[228],"(d=768),":[230],"returns":[232],"parity.":[234]},"counts_by_year":[],"updated_date":"2026-05-05T08:41:31.759640","created_date":"2026-03-10T00:00:00"}
