{"id":"https://openalex.org/W4417529476","doi":"https://doi.org/10.48550/arxiv.2512.15742","title":"SHARe-KAN: Post-Training Vector Quantization for Cache-Resident KAN Inference","display_name":"SHARe-KAN: Post-Training Vector Quantization for Cache-Resident KAN Inference","publication_year":2025,"publication_date":"2025-12-10","ids":{"openalex":"https://openalex.org/W4417529476","doi":"https://doi.org/10.48550/arxiv.2512.15742"},"language":null,"primary_location":{"id":"pmh:oai:arXiv.org:2512.15742","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2512.15742","pdf_url":"https://arxiv.org/pdf/2512.15742","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2512.15742","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5025902664","display_name":"Jeff P. Smith","orcid":"https://orcid.org/0000-0003-3264-6095"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Smith, Jeff","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5025902664"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12808","display_name":"Ferroelectric and Negative Capacitance Devices","score":0.4832000136375427,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12808","display_name":"Ferroelectric and Negative Capacitance Devices","score":0.4832000136375427,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.226500004529953,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10502","display_name":"Advanced Memory and Neural Computing","score":0.07429999858140945,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/memory-bandwidth","display_name":"Memory bandwidth","score":0.5048999786376953},{"id":"https://openalex.org/keywords/dram","display_name":"Dram","score":0.4934999942779541},{"id":"https://openalex.org/keywords/redundancy","display_name":"Redundancy (engineering)","score":0.47209998965263367},{"id":"https://openalex.org/keywords/quantization","display_name":"Quantization (signal processing)","score":0.46639999747276306},{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.42809998989105225},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.3928000032901764},{"id":"https://openalex.org/keywords/cache","display_name":"Cache","score":0.37959998846054077},{"id":"https://openalex.org/keywords/vector-quantization","display_name":"Vector quantization","score":0.3612000048160553},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.35199999809265137},{"id":"https://openalex.org/keywords/solver","display_name":"Solver","score":0.3409000039100647}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7333999872207642},{"id":"https://openalex.org/C188045654","wikidata":"https://www.wikidata.org/wiki/Q17148339","display_name":"Memory bandwidth","level":2,"score":0.5048999786376953},{"id":"https://openalex.org/C7366592","wikidata":"https://www.wikidata.org/wiki/Q1255620","display_name":"Dram","level":2,"score":0.4934999942779541},{"id":"https://openalex.org/C152124472","wikidata":"https://www.wikidata.org/wiki/Q1204361","display_name":"Redundancy (engineering)","level":2,"score":0.47209998965263367},{"id":"https://openalex.org/C28855332","wikidata":"https://www.wikidata.org/wiki/Q198099","display_name":"Quantization (signal processing)","level":2,"score":0.46639999747276306},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.4496000111103058},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.42809998989105225},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.3928000032901764},{"id":"https://openalex.org/C115537543","wikidata":"https://www.wikidata.org/wiki/Q165596","display_name":"Cache","level":2,"score":0.37959998846054077},{"id":"https://openalex.org/C199833920","wikidata":"https://www.wikidata.org/wiki/Q612536","display_name":"Vector quantization","level":2,"score":0.3612000048160553},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.3540000021457672},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.35199999809265137},{"id":"https://openalex.org/C2778770139","wikidata":"https://www.wikidata.org/wiki/Q1966904","display_name":"Solver","level":2,"score":0.3409000039100647},{"id":"https://openalex.org/C169590947","wikidata":"https://www.wikidata.org/wiki/Q47506","display_name":"Compiler","level":2,"score":0.3377000093460083},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.3375999927520752},{"id":"https://openalex.org/C75608658","wikidata":"https://www.wikidata.org/wiki/Q44395","display_name":"Pascal (unit)","level":2,"score":0.3368000090122223},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.31040000915527344},{"id":"https://openalex.org/C12362212","wikidata":"https://www.wikidata.org/wiki/Q728435","display_name":"Linear subspace","level":2,"score":0.3001999855041504},{"id":"https://openalex.org/C2776257435","wikidata":"https://www.wikidata.org/wiki/Q1576430","display_name":"Bandwidth (computing)","level":2,"score":0.298799991607666},{"id":"https://openalex.org/C74912251","wikidata":"https://www.wikidata.org/wiki/Q6815727","display_name":"Memory footprint","level":2,"score":0.29179999232292175},{"id":"https://openalex.org/C509933004","wikidata":"https://www.wikidata.org/wiki/Q194163","display_name":"Broadband","level":2,"score":0.28679999709129333},{"id":"https://openalex.org/C123657996","wikidata":"https://www.wikidata.org/wiki/Q12271","display_name":"Architecture","level":2,"score":0.2854999899864197},{"id":"https://openalex.org/C5917680","wikidata":"https://www.wikidata.org/wiki/Q2621825","display_name":"Basis function","level":2,"score":0.2766000032424927},{"id":"https://openalex.org/C179518139","wikidata":"https://www.wikidata.org/wiki/Q5140297","display_name":"Coding (social sciences)","level":2,"score":0.2752000093460083},{"id":"https://openalex.org/C2778100165","wikidata":"https://www.wikidata.org/wiki/Q1589327","display_name":"Memory hierarchy","level":3,"score":0.2676999866962433},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.26600000262260437},{"id":"https://openalex.org/C140745168","wikidata":"https://www.wikidata.org/wiki/Q1210082","display_name":"Tree traversal","level":2,"score":0.2653000056743622},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.2646999955177307},{"id":"https://openalex.org/C105339364","wikidata":"https://www.wikidata.org/wiki/Q2297740","display_name":"Software deployment","level":2,"score":0.26440000534057617},{"id":"https://openalex.org/C205711294","wikidata":"https://www.wikidata.org/wiki/Q176953","display_name":"Rendering (computer graphics)","level":2,"score":0.263700008392334},{"id":"https://openalex.org/C187191949","wikidata":"https://www.wikidata.org/wiki/Q1138496","display_name":"Profiling (computer programming)","level":2,"score":0.2623000144958496},{"id":"https://openalex.org/C55439883","wikidata":"https://www.wikidata.org/wiki/Q360812","display_name":"Correctness","level":2,"score":0.2574999928474426},{"id":"https://openalex.org/C43364308","wikidata":"https://www.wikidata.org/wiki/Q8799","display_name":"Byte","level":2,"score":0.2547999918460846},{"id":"https://openalex.org/C180623205","wikidata":"https://www.wikidata.org/wiki/Q1268589","display_name":"Outer product","level":3,"score":0.25290000438690186},{"id":"https://openalex.org/C111335779","wikidata":"https://www.wikidata.org/wiki/Q3454686","display_name":"Reduction (mathematics)","level":2,"score":0.25040000677108765},{"id":"https://openalex.org/C187590223","wikidata":"https://www.wikidata.org/wiki/Q527628","display_name":"Holography","level":2,"score":0.2502000033855438}],"mesh":[],"locations_count":3,"locations":[{"id":"pmh:oai:arXiv.org:2512.15742","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2512.15742","pdf_url":"https://arxiv.org/pdf/2512.15742","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"pmh:oai:arXiv.org:2512.15742","is_oa":true,"landing_page_url":"https://arxiv.org/abs/2512.15742","pdf_url":"https://arxiv.org/pdf/2512.15742","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2512.15742","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2512.15742","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2512.15742","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2512.15742","pdf_url":"https://arxiv.org/pdf/2512.15742","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Pre-trained":[0],"Vision":[1],"Kolmogorov-Arnold":[2],"Networks":[3],"(KANs)":[4],"store":[5],"a":[6,23,30,65,73,77,98,120,192],"dense":[7],"B-spline":[8],"grid":[9],"on":[10,33,40],"every":[11],"edge,":[12],"inflating":[13],"prediction-head":[14,181],"parameter":[15],"counts":[16],"by":[17],"more":[18],"than":[19],"140X":[20],"relative":[21],"to":[22,135,160],"comparable":[24],"MLP":[25],"and":[26,48,155],"pushing":[27],"inference":[28],"into":[29,90],"memory-bound":[31],"regime":[32],"edge":[34,206],"accelerators.":[35],"Standard":[36],"magnitude":[37],"pruning":[38],"fails":[39],"these":[41],"pre-trained":[42],"models:":[43],"zero-shot":[44],"sparsity":[45],"collapses":[46],"accuracy,":[47],"restoring":[49],"it":[50],"requires":[51,189],"an":[52,83],"iterative":[53],"fine-tuning":[54],"loop":[55],"that":[56,68,86,195],"is":[57],"impractical":[58],"in":[59],"deployment":[60,199],"settings.":[61],"We":[62],"present":[63],"SHARe-KAN,":[64],"post-training":[66],"compiler":[67],"compresses":[69],"spline":[70],"coefficients":[71],"via":[72],"Gain-Shape-Bias":[74],"decomposition":[75],"with":[76,81,97,130],"layer-shared":[78],"codebook,":[79],"paired":[80],"LUTHAM,":[82],"ExecuTorch":[84],"runtime":[85],"maps":[87],"the":[88,108,140,150,170,201],"codebook":[89],"on-chip":[91],"L2.":[92],"On":[93],"PASCAL":[94],"VOC":[95],"detection":[96],"ResNet-50":[99],"backbone,":[100],"SHARe-KAN":[101,187],"Int8":[102,161,188],"reaches":[103,183],"9.3X":[104],"storage":[105,182],"compression":[106],"over":[107],"Dense":[109,141,179],"KAN":[110,142,180,198],"baseline":[111],"(6.32":[112],"MB":[113,116],"vs.":[114,127],"58.67":[115],"prediction":[117],"head)":[118],"at":[119,173,175],"2.0":[121],"point":[122],"in-domain":[123],"accuracy":[124],"cost":[125],"(80.22%":[126],"82.22%":[128],"mAP),":[129],"no":[131],"retraining.":[132],"Zero-shot":[133],"transfer":[134],"COCO":[136],"retains":[137],"88.9%":[138],"of":[139,145,169,204],"mAP;":[143],"most":[144],"this":[146],"gap":[147],"comes":[148],"from":[149,158],"VQ":[151],"clustering":[152],"step":[153],"itself,":[154],"further":[156],"quantization":[157],"FP32":[159],"costs":[162],"only":[163],"1.3":[164],"retention":[165],"points.":[166],"The":[167],"value":[168],"approach":[171],"compounds":[172],"scale:":[174],"50":[176],"task":[177],"heads,":[178],"2.9":[184],"GB":[185],"while":[186],"211":[190],"MB,":[191],"13.9X":[193],"reduction":[194],"brings":[196],"multi-expert":[197],"within":[200],"memory":[202],"budgets":[203],"contemporary":[205],"silicon.":[207]},"counts_by_year":[],"updated_date":"2026-04-19T08:26:33.389920","created_date":"2025-12-21T00:00:00"}
