{"id":"https://openalex.org/W7130683970","doi":"https://doi.org/10.1109/tc.2026.3666458","title":"TRACE: Unlocking Effective CXL Bandwidth via Lossless Compression and Precision Scaling","display_name":"TRACE: Unlocking Effective CXL Bandwidth via Lossless Compression and Precision Scaling","publication_year":2026,"publication_date":"2026-02-20","ids":{"openalex":"https://openalex.org/W7130683970","doi":"https://doi.org/10.1109/tc.2026.3666458"},"language":null,"primary_location":{"id":"doi:10.1109/tc.2026.3666458","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tc.2026.3666458","pdf_url":null,"source":{"id":"https://openalex.org/S157670870","display_name":"IEEE Transactions on Computers","issn_l":"0018-9340","issn":["0018-9340","1557-9956","2326-3814"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Computers","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Rui Xie","orcid":"https://orcid.org/0000-0003-3177-5071"},"institutions":[{"id":"https://openalex.org/I165799507","display_name":"Rensselaer Polytechnic Institute","ror":"https://ror.org/01rtyzb94","country_code":"US","type":"education","lineage":["https://openalex.org/I165799507"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Rui Xie","raw_affiliation_strings":["Rensselaer Polytechnic Institute, Troy, NY, USA"],"raw_orcid":"https://orcid.org/0000-0003-3177-5071","affiliations":[{"raw_affiliation_string":"Rensselaer Polytechnic Institute, Troy, NY, USA","institution_ids":["https://openalex.org/I165799507"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Asad Ul Haq","orcid":"https://orcid.org/0009-0003-7975-0102"},"institutions":[{"id":"https://openalex.org/I165799507","display_name":"Rensselaer Polytechnic Institute","ror":"https://ror.org/01rtyzb94","country_code":"US","type":"education","lineage":["https://openalex.org/I165799507"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Asad Ul Haq","raw_affiliation_strings":["Rensselaer Polytechnic Institute, Troy, NY, USA"],"raw_orcid":"https://orcid.org/0009-0003-7975-0102","affiliations":[{"raw_affiliation_string":"Rensselaer Polytechnic Institute, Troy, NY, USA","institution_ids":["https://openalex.org/I165799507"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Yunhua Fang","orcid":"https://orcid.org/0009-0009-4718-8825"},"institutions":[{"id":"https://openalex.org/I165799507","display_name":"Rensselaer Polytechnic Institute","ror":"https://ror.org/01rtyzb94","country_code":"US","type":"education","lineage":["https://openalex.org/I165799507"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yunhua Fang","raw_affiliation_strings":["Rensselaer Polytechnic Institute, Troy, NY, USA"],"raw_orcid":"https://orcid.org/0009-0009-4718-8825","affiliations":[{"raw_affiliation_string":"Rensselaer Polytechnic Institute, Troy, NY, USA","institution_ids":["https://openalex.org/I165799507"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Linsen Ma","orcid":"https://orcid.org/0009-0000-8535-7911"},"institutions":[{"id":"https://openalex.org/I165799507","display_name":"Rensselaer Polytechnic Institute","ror":"https://ror.org/01rtyzb94","country_code":"US","type":"education","lineage":["https://openalex.org/I165799507"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Linsen Ma","raw_affiliation_strings":["Rensselaer Polytechnic Institute, Troy, NY, USA"],"raw_orcid":"https://orcid.org/0009-0000-8535-7911","affiliations":[{"raw_affiliation_string":"Rensselaer Polytechnic Institute, Troy, NY, USA","institution_ids":["https://openalex.org/I165799507"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Zirak Burzin Engineer","orcid":"https://orcid.org/0009-0001-7058-9506"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zirak Burzin Engineer","raw_affiliation_strings":["Wiseburn Da Vinci Science, El Segundo, CA, USA"],"raw_orcid":"https://orcid.org/0009-0001-7058-9506","affiliations":[{"raw_affiliation_string":"Wiseburn Da Vinci Science, El Segundo, CA, USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":null,"display_name":"Liu Liu","orcid":"https://orcid.org/0000-0003-0792-8146"},"institutions":[{"id":"https://openalex.org/I165799507","display_name":"Rensselaer Polytechnic Institute","ror":"https://ror.org/01rtyzb94","country_code":"US","type":"education","lineage":["https://openalex.org/I165799507"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Liu Liu","raw_affiliation_strings":["Rensselaer Polytechnic Institute, Troy, NY, USA"],"raw_orcid":"https://orcid.org/0000-0003-0792-8146","affiliations":[{"raw_affiliation_string":"Rensselaer Polytechnic Institute, Troy, NY, USA","institution_ids":["https://openalex.org/I165799507"]}]},{"author_position":"last","author":{"id":null,"display_name":"Tong Zhang","orcid":"https://orcid.org/0009-0009-8005-0043"},"institutions":[{"id":"https://openalex.org/I165799507","display_name":"Rensselaer Polytechnic Institute","ror":"https://ror.org/01rtyzb94","country_code":"US","type":"education","lineage":["https://openalex.org/I165799507"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Tong Zhang","raw_affiliation_strings":["Rensselaer Polytechnic Institute, Troy, NY, USA"],"raw_orcid":"https://orcid.org/0009-0009-8005-0043","affiliations":[{"raw_affiliation_string":"Rensselaer Polytechnic Institute, Troy, NY, USA","institution_ids":["https://openalex.org/I165799507"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I165799507"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.25890855,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"75","issue":"4","first_page":"1349","last_page":"1362"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.5644000172615051,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.5644000172615051,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.2757999897003174,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12326","display_name":"Network Packet Processing and Optimization","score":0.02500000037252903,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/lossless-compression","display_name":"Lossless compression","score":0.5748000144958496},{"id":"https://openalex.org/keywords/bandwidth","display_name":"Bandwidth (computing)","score":0.49880000948905945},{"id":"https://openalex.org/keywords/decoding-methods","display_name":"Decoding methods","score":0.4767000079154968},{"id":"https://openalex.org/keywords/codec","display_name":"Codec","score":0.476500004529953},{"id":"https://openalex.org/keywords/dram","display_name":"Dram","score":0.4424999952316284},{"id":"https://openalex.org/keywords/cache","display_name":"Cache","score":0.4083000123500824},{"id":"https://openalex.org/keywords/memory-bandwidth","display_name":"Memory bandwidth","score":0.4077000021934509},{"id":"https://openalex.org/keywords/throughput","display_name":"Throughput","score":0.40720000863075256},{"id":"https://openalex.org/keywords/memory-footprint","display_name":"Memory footprint","score":0.38260000944137573}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8302000164985657},{"id":"https://openalex.org/C81081738","wikidata":"https://www.wikidata.org/wiki/Q55542","display_name":"Lossless compression","level":3,"score":0.5748000144958496},{"id":"https://openalex.org/C2776257435","wikidata":"https://www.wikidata.org/wiki/Q1576430","display_name":"Bandwidth (computing)","level":2,"score":0.49880000948905945},{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.4767000079154968},{"id":"https://openalex.org/C161765866","wikidata":"https://www.wikidata.org/wiki/Q184748","display_name":"Codec","level":2,"score":0.476500004529953},{"id":"https://openalex.org/C7366592","wikidata":"https://www.wikidata.org/wiki/Q1255620","display_name":"Dram","level":2,"score":0.4424999952316284},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.41350001096725464},{"id":"https://openalex.org/C115537543","wikidata":"https://www.wikidata.org/wiki/Q165596","display_name":"Cache","level":2,"score":0.4083000123500824},{"id":"https://openalex.org/C188045654","wikidata":"https://www.wikidata.org/wiki/Q17148339","display_name":"Memory bandwidth","level":2,"score":0.4077000021934509},{"id":"https://openalex.org/C157764524","wikidata":"https://www.wikidata.org/wiki/Q1383412","display_name":"Throughput","level":3,"score":0.40720000863075256},{"id":"https://openalex.org/C74912251","wikidata":"https://www.wikidata.org/wiki/Q6815727","display_name":"Memory footprint","level":2,"score":0.38260000944137573},{"id":"https://openalex.org/C2780165032","wikidata":"https://www.wikidata.org/wiki/Q16869822","display_name":"Energy consumption","level":2,"score":0.35670000314712524},{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.3555999994277954},{"id":"https://openalex.org/C19275194","wikidata":"https://www.wikidata.org/wiki/Q222903","display_name":"Multiplexing","level":2,"score":0.34470000863075256},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.32850000262260437},{"id":"https://openalex.org/C2742236","wikidata":"https://www.wikidata.org/wiki/Q924713","display_name":"Efficient energy use","level":2,"score":0.32690000534057617},{"id":"https://openalex.org/C132943942","wikidata":"https://www.wikidata.org/wiki/Q2562511","display_name":"Footprint","level":2,"score":0.32679998874664307},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.32339999079704285},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.3212999999523163},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.31869998574256897},{"id":"https://openalex.org/C165005293","wikidata":"https://www.wikidata.org/wiki/Q1074500","display_name":"Chip","level":2,"score":0.3131999969482422},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.301800012588501},{"id":"https://openalex.org/C78548338","wikidata":"https://www.wikidata.org/wiki/Q2493","display_name":"Data compression","level":2,"score":0.30090001225471497},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.29739999771118164},{"id":"https://openalex.org/C100800780","wikidata":"https://www.wikidata.org/wiki/Q1175867","display_name":"Memory controller","level":3,"score":0.287200003862381},{"id":"https://openalex.org/C118021083","wikidata":"https://www.wikidata.org/wiki/Q610398","display_name":"System on a chip","level":2,"score":0.2802000045776367},{"id":"https://openalex.org/C75291252","wikidata":"https://www.wikidata.org/wiki/Q1315756","display_name":"TRACE (psycholinguistics)","level":2,"score":0.27790001034736633},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.2687999904155731},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.26100000739097595},{"id":"https://openalex.org/C203479927","wikidata":"https://www.wikidata.org/wiki/Q5165939","display_name":"Controller (irrigation)","level":2,"score":0.2549999952316284}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tc.2026.3666458","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tc.2026.3666458","pdf_url":null,"source":{"id":"https://openalex.org/S157670870","display_name":"IEEE Transactions on Computers","issn_l":"0018-9340","issn":["0018-9340","1557-9956","2326-3814"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Computers","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Affordable and clean energy","score":0.4877888262271881,"id":"https://metadata.un.org/sdg/7"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":16,"referenced_works":["https://openalex.org/W1509841269","https://openalex.org/W2053425642","https://openalex.org/W2346205343","https://openalex.org/W2430301697","https://openalex.org/W2516109628","https://openalex.org/W2963015836","https://openalex.org/W2998617917","https://openalex.org/W3006586535","https://openalex.org/W3163073193","https://openalex.org/W3194676777","https://openalex.org/W4389519291","https://openalex.org/W4399677840","https://openalex.org/W4402130049","https://openalex.org/W4404782031","https://openalex.org/W4405348616","https://openalex.org/W4406650295"],"related_works":[],"abstract_inverted_index":{"LLM":[0,70],"inference":[1],"is":[2,72],"increasingly":[3],"limited":[4],"by":[5,134,148,154],"memory":[6,21],"bandwidth,":[7],"and":[8,27,32,87,109,150,223,230],"the":[9,16,47,91,97,137],"bottleneck":[10],"worsens":[11],"at":[12,162,176,227],"long":[13],"context":[14],"as":[15],"KV":[17,78,152,159,169],"cache":[18],"grows.":[19],"CXL":[20,48,51,209],"adds":[22,218],"capacity":[23],"to":[24,46,55,67,171,181,188,207],"offload":[25],"weights":[26],"KV,":[28],"but":[29,95],"its":[30],"link":[31],"device-side":[33],"DDR":[34],"bandwidth":[35],"are":[36,53],"far":[37],"below":[38],"HBM,":[39],"so":[40],"decoding":[41],"stalls":[42],"once":[43,168],"traffic":[44],"shifts":[45],"tier.":[49],"Many":[50],"controllers":[52],"starting":[54],"add":[56],"generic":[57,212],"<italic":[58],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[59],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">lossless</i>":[60],"compression,":[61,115,215],"yet":[62],"applying":[63],"commodity":[64,124],"codecs":[65,125],"directly":[66],"standard":[68],"word-major":[69],"tensors":[71,102],"largely":[73],"ineffective,":[74],"especially":[75],"for":[76,85],"token-major":[77],"streams.":[79],"We":[80],"propose":[81],"TRACE":[82,143,216],"(Traffic-Reduced":[83],"Architecture":[84],"Compression":[86],"Elasticity),":[88],"which":[89],"preserves":[90],"unmodified":[92],"CXL.mem":[93],"interface":[94],"changes":[96],"device-internal":[98],"representation.":[99],"It":[100],"stores":[101],"in":[103],"a":[104,111,208],"channel-major,":[105],"disaggregated":[106],"bit-plane":[107],"layout,":[108],"applies":[110],"KV-specific":[112],"transform":[113],"before":[114],"converting":[116],"mixed-field":[117],"words":[118],"into":[119],"low-entropy":[120],"plane":[121],"streams":[122],"that":[123],"can":[126],"compress.":[127],"The":[128],"same":[129],"substrate":[130],"enables":[131],"precision-proportional":[132],"fetch":[133],"reading":[135],"only":[136,217],"required":[138],"bit-planes.":[139],"Across":[140],"public":[141],"LLMs,":[142],"reduces":[144],"BF16":[145,151],"weight":[146],"footprint":[147,153],"25.2%":[149],"46.9%":[155],"losslessly,":[156],"with":[157,211],"per-layer":[158],"ratios":[160],"peaking":[161],"2.69\u00d7.":[163],"In":[164],"tracedriven":[165],"system":[166],"modeling,":[167],"spills":[170],"CXL,":[172],"GPT-OSS-120B-MXFP4":[173],"improves":[174],"throughput":[175],"128k":[177],"tokens":[178],"from":[179],"16.28":[180],"68.99":[182],"tok/s":[183],"(4.24\u00d7).":[184],"DRAMSim3":[185],"shows":[186],"up":[187],"40.3%":[189],"lower":[190],"DRAM":[191],"access":[192],"energy":[193],"under":[194],"plane-aligned":[195],"fetch.":[196],"A":[197],"7nm":[198],"SystemVerilog":[199],"implementation":[200],"sustains":[201],"256":[202],"GB/s":[203],"device":[204],"bandwidth.":[205],"Relative":[206],"controller":[210],"inline":[213],"lossless":[214],"7.2%":[219],"area,":[220],"4.7%":[221],"power,":[222],"6.0%":[224],"load-to-use":[225],"latency":[226],"2":[228],"GHz":[229],"0.7V.":[231]},"counts_by_year":[],"updated_date":"2026-03-20T20:47:17.329874","created_date":"2026-02-04T00:00:00"}
