{"id":"https://openalex.org/W7155521191","doi":"https://doi.org/10.48550/arxiv.2604.21724","title":"Beyond N-gram: Data-Aware X-GRAM Extraction for Efficient Embedding Parameter Scaling","display_name":"Beyond N-gram: Data-Aware X-GRAM Extraction for Efficient Embedding Parameter Scaling","publication_year":2026,"publication_date":"2026-04-23","ids":{"openalex":"https://openalex.org/W7155521191","doi":"https://doi.org/10.48550/arxiv.2604.21724"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.21724","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.21724","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.21724","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5134525819","display_name":"Yilong Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Chen, Yilong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134502239","display_name":"Yanxi Xie","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xie, Yanxi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134504264","display_name":"Zitian Gao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gao, Zitian","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134478187","display_name":"He Xin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xin, He","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134474260","display_name":"Yihao Xiao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xiao, Yihao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134529024","display_name":"Renbiao Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Jason Klein","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134515216","display_name":"Haoming Luo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Luo, Haoming","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114053168","display_name":"Yifan Luo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Luo, Yifan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133220164","display_name":"Zhengmao Ye","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ye, Zhengmao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134512662","display_name":"Tingwen Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Tingwen","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134515974","display_name":"Xin Zhao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhao, Xin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134515701","display_name":"Ran Tao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tao, Ran","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5134537264","display_name":"Bryan Dai","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dai, Bryan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":13,"corresponding_author_ids":["https://openalex.org/A5134525819"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.15760000050067902,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.15760000050067902,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.14219999313354492,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12292","display_name":"Graph Theory and Algorithms","score":0.07769999653100967,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.6276000142097473},{"id":"https://openalex.org/keywords/scaling","display_name":"Scaling","score":0.5411999821662903},{"id":"https://openalex.org/keywords/alias","display_name":"Alias","score":0.4828000068664551},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.47999998927116394},{"id":"https://openalex.org/keywords/spurious-relationship","display_name":"Spurious relationship","score":0.44519999623298645},{"id":"https://openalex.org/keywords/hash-function","display_name":"Hash function","score":0.37540000677108765},{"id":"https://openalex.org/keywords/aggregate","display_name":"Aggregate (composite)","score":0.37139999866485596},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.3587000072002411},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.3488999903202057}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7839999794960022},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.6276000142097473},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.6194999814033508},{"id":"https://openalex.org/C99844830","wikidata":"https://www.wikidata.org/wiki/Q102441924","display_name":"Scaling","level":2,"score":0.5411999821662903},{"id":"https://openalex.org/C46681722","wikidata":"https://www.wikidata.org/wiki/Q4725589","display_name":"Alias","level":2,"score":0.4828000068664551},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.47999998927116394},{"id":"https://openalex.org/C97256817","wikidata":"https://www.wikidata.org/wiki/Q1462316","display_name":"Spurious relationship","level":2,"score":0.44519999623298645},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.40049999952316284},{"id":"https://openalex.org/C99138194","wikidata":"https://www.wikidata.org/wiki/Q183427","display_name":"Hash function","level":2,"score":0.37540000677108765},{"id":"https://openalex.org/C4679612","wikidata":"https://www.wikidata.org/wiki/Q866298","display_name":"Aggregate (composite)","level":2,"score":0.37139999866485596},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.3587000072002411},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.3488999903202057},{"id":"https://openalex.org/C134835016","wikidata":"https://www.wikidata.org/wiki/Q690265","display_name":"Lookup table","level":2,"score":0.3314000070095062},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3133000135421753},{"id":"https://openalex.org/C205606062","wikidata":"https://www.wikidata.org/wiki/Q5249645","display_name":"Decoupling (probability)","level":2,"score":0.3043000102043152},{"id":"https://openalex.org/C2776291640","wikidata":"https://www.wikidata.org/wiki/Q2912517","display_name":"Value (mathematics)","level":2,"score":0.3037000000476837},{"id":"https://openalex.org/C74912251","wikidata":"https://www.wikidata.org/wiki/Q6815727","display_name":"Memory footprint","level":2,"score":0.2962999939918518},{"id":"https://openalex.org/C190290938","wikidata":"https://www.wikidata.org/wiki/Q387015","display_name":"Trie","level":3,"score":0.28949999809265137},{"id":"https://openalex.org/C117251300","wikidata":"https://www.wikidata.org/wiki/Q1849855","display_name":"Parametric statistics","level":2,"score":0.2809999883174896},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.26919999718666077},{"id":"https://openalex.org/C162319229","wikidata":"https://www.wikidata.org/wiki/Q175263","display_name":"Data structure","level":2,"score":0.26440000534057617},{"id":"https://openalex.org/C138777275","wikidata":"https://www.wikidata.org/wiki/Q6884054","display_name":"Mixing (physics)","level":2,"score":0.2639000117778778},{"id":"https://openalex.org/C176649486","wikidata":"https://www.wikidata.org/wiki/Q2308807","display_name":"Memory management","level":3,"score":0.25609999895095825},{"id":"https://openalex.org/C67388219","wikidata":"https://www.wikidata.org/wiki/Q207440","display_name":"Hash table","level":3,"score":0.2549000084400177},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.2538999915122986}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.21724","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.21724","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.21724","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.21724","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Large":[0],"token-indexed":[1],"lookup":[2],"tables":[3,155],"provide":[4],"a":[5,52,110,172],"compute-decoupled":[6],"scaling":[7,112],"path,":[8],"but":[9],"their":[10],"practical":[11,175],"gains":[12],"are":[13,88],"often":[14],"limited":[15],"by":[16,134,161],"poor":[17],"parameter":[18],"efficiency":[19],"and":[20,39,61,72,94,125,144,174],"rapid":[21],"memory":[22,103,168],"growth.":[23],"We":[24],"attribute":[25],"these":[26],"limitations":[27],"to":[28,64,80],"Zipfian":[29],"under-training":[30],"of":[31],"the":[32,66,123,141,157],"long":[33],"tail,":[34],"heterogeneous":[35],"demand":[36],"across":[37],"layers,":[38],"\"slot":[40],"collapse\"":[41],"that":[42,114,129],"produces":[43],"redundant":[44],"embeddings.":[45],"To":[46],"address":[47],"this,":[48],"we":[49],"propose":[50],"X-GRAM,":[51],"frequency-aware":[53],"dynamic":[54,105],"token-injection":[55],"framework.":[56],"X-GRAM":[57,130,170],"employs":[58],"hybrid":[59],"hashing":[60],"alias":[62],"mixing":[63],"compress":[65],"tail":[67],"while":[68,151],"preserving":[69],"head":[70],"capacity,":[71],"refines":[73],"retrieved":[74],"vectors":[75],"via":[76],"normalized":[77],"SwiGLU":[78],"ShortConv":[79],"extract":[81],"diverse":[82],"local":[83],"n-gram":[84],"features.":[85],"These":[86],"signals":[87],"integrated":[89],"into":[90],"attention":[91],"value":[92],"streams":[93],"inter-layer":[95],"residuals":[96],"using":[97,152],"depth-aware":[98],"gating,":[99],"effectively":[100],"aligning":[101],"static":[102],"with":[104],"context.":[106],"This":[107],"design":[108],"introduces":[109],"memory-centric":[111],"axis":[113],"decouples":[115],"model":[116],"capacity":[117,163],"from":[118,164],"FLOPs.":[119],"Extensive":[120],"evaluations":[121],"at":[122],"0.73B":[124],"1.15B":[126],"scales":[127],"show":[128],"improves":[131],"average":[132],"accuracy":[133],"as":[135,137],"much":[136],"4.4":[138],"points":[139,146],"over":[140,147],"vanilla":[142],"backbone":[143],"3.2":[145],"strong":[148],"retrieval":[149],"baselines,":[150],"substantially":[153],"smaller":[154],"in":[156,183],"50%":[158],"configuration.":[159],"Overall,":[160],"decoupling":[162],"compute":[165],"through":[166],"efficient":[167],"management,":[169],"offers":[171],"scalable":[173],"paradigm":[176],"for":[177],"future":[178],"memory-augmented":[179],"architectures.":[180],"Code":[181],"aviliable":[182],"https://github.com/Longyichen/X-gram.":[184]},"counts_by_year":[],"updated_date":"2026-04-28T06:04:28.489925","created_date":"2026-04-25T00:00:00"}
