{"id":"https://openalex.org/W7155218047","doi":"https://doi.org/10.48550/arxiv.2604.18610","title":"SpikeMLLM: Spike-based Multimodal Large Language Models via Modality-Specific Temporal Scales and Temporal Compression","display_name":"SpikeMLLM: Spike-based Multimodal Large Language Models via Modality-Specific Temporal Scales and Temporal Compression","publication_year":2026,"publication_date":"2026-04-13","ids":{"openalex":"https://openalex.org/W7155218047","doi":"https://doi.org/10.48550/arxiv.2604.18610"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.18610","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.18610","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Preprint"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.18610","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5134250580","display_name":"Han Xu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xu, Han","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134255891","display_name":"Zhiyong Qin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Qin, Zhiyong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134285236","display_name":"Di Shang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shang, Di","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134287949","display_name":"Jiahong Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Jiahong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134259303","display_name":"Xuerui Qiu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Qiu, Xuerui","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134263375","display_name":"Bo Lei","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lei, Bo","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134248103","display_name":"Tiejun Huang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Huang, Tiejun","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134356564","display_name":"Bo Xu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xu, Bo","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5134235522","display_name":"Guoqi Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Guoqi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10502","display_name":"Advanced Memory and Neural Computing","score":0.5073000192642212,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10502","display_name":"Advanced Memory and Neural Computing","score":0.5073000192642212,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12808","display_name":"Ferroelectric and Negative Capacitance Devices","score":0.2257000058889389,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.09650000184774399,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/neuromorphic-engineering","display_name":"Neuromorphic engineering","score":0.4715000092983246},{"id":"https://openalex.org/keywords/efficient-energy-use","display_name":"Efficient energy use","score":0.45590001344680786},{"id":"https://openalex.org/keywords/quantization","display_name":"Quantization (signal processing)","score":0.4131999909877777},{"id":"https://openalex.org/keywords/energy-consumption","display_name":"Energy consumption","score":0.4092000126838684},{"id":"https://openalex.org/keywords/overhead","display_name":"Overhead (engineering)","score":0.4075999855995178},{"id":"https://openalex.org/keywords/encoding","display_name":"Encoding (memory)","score":0.4050999879837036},{"id":"https://openalex.org/keywords/data-compression","display_name":"Data compression","score":0.396699994802475},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.3953000009059906},{"id":"https://openalex.org/keywords/energy","display_name":"Energy (signal processing)","score":0.3522000014781952},{"id":"https://openalex.org/keywords/baseline","display_name":"Baseline (sea)","score":0.3174000084400177}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8198999762535095},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.48069998621940613},{"id":"https://openalex.org/C151927369","wikidata":"https://www.wikidata.org/wiki/Q1981312","display_name":"Neuromorphic engineering","level":3,"score":0.4715000092983246},{"id":"https://openalex.org/C2742236","wikidata":"https://www.wikidata.org/wiki/Q924713","display_name":"Efficient energy use","level":2,"score":0.45590001344680786},{"id":"https://openalex.org/C28855332","wikidata":"https://www.wikidata.org/wiki/Q198099","display_name":"Quantization (signal processing)","level":2,"score":0.4131999909877777},{"id":"https://openalex.org/C2780165032","wikidata":"https://www.wikidata.org/wiki/Q16869822","display_name":"Energy consumption","level":2,"score":0.4092000126838684},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.4075999855995178},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.4050999879837036},{"id":"https://openalex.org/C78548338","wikidata":"https://www.wikidata.org/wiki/Q2493","display_name":"Data compression","level":2,"score":0.396699994802475},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.3953000009059906},{"id":"https://openalex.org/C186370098","wikidata":"https://www.wikidata.org/wiki/Q442787","display_name":"Energy (signal processing)","level":2,"score":0.3522000014781952},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.3174000084400177},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3138999938964844},{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.3050000071525574},{"id":"https://openalex.org/C147168706","wikidata":"https://www.wikidata.org/wiki/Q1457734","display_name":"Recurrent neural network","level":3,"score":0.3046000003814697},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.302700012922287},{"id":"https://openalex.org/C157764524","wikidata":"https://www.wikidata.org/wiki/Q1383412","display_name":"Throughput","level":3,"score":0.30059999227523804},{"id":"https://openalex.org/C177774035","wikidata":"https://www.wikidata.org/wiki/Q1246948","display_name":"Granularity","level":2,"score":0.2996000051498413},{"id":"https://openalex.org/C2781390188","wikidata":"https://www.wikidata.org/wiki/Q25203449","display_name":"Spike (software development)","level":2,"score":0.29319998621940613},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.29170000553131104},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.28870001435279846},{"id":"https://openalex.org/C2779903281","wikidata":"https://www.wikidata.org/wiki/Q6888026","display_name":"Modalities","level":2,"score":0.2838999927043915},{"id":"https://openalex.org/C161765866","wikidata":"https://www.wikidata.org/wiki/Q184748","display_name":"Codec","level":2,"score":0.27880001068115234},{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.2770000100135803},{"id":"https://openalex.org/C119666444","wikidata":"https://www.wikidata.org/wiki/Q5977280","display_name":"Temporal resolution","level":2,"score":0.2759999930858612},{"id":"https://openalex.org/C77637269","wikidata":"https://www.wikidata.org/wiki/Q7002051","display_name":"Neural coding","level":2,"score":0.2703000009059906},{"id":"https://openalex.org/C2984842247","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep neural networks","level":3,"score":0.27000001072883606},{"id":"https://openalex.org/C180016635","wikidata":"https://www.wikidata.org/wiki/Q2712821","display_name":"Compression (physics)","level":2,"score":0.2676999866962433},{"id":"https://openalex.org/C2984118289","wikidata":"https://www.wikidata.org/wiki/Q29954","display_name":"Power consumption","level":3,"score":0.26249998807907104},{"id":"https://openalex.org/C188198153","wikidata":"https://www.wikidata.org/wiki/Q1613840","display_name":"Limiting","level":2,"score":0.2581000030040741},{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.25690001249313354},{"id":"https://openalex.org/C11731999","wikidata":"https://www.wikidata.org/wiki/Q9067355","display_name":"Spiking neural network","level":3,"score":0.2563999891281128},{"id":"https://openalex.org/C94835093","wikidata":"https://www.wikidata.org/wiki/Q3113333","display_name":"Data compression ratio","level":5,"score":0.2540999948978424},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.2508000135421753}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.18610","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.18610","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"Preprint"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.18610","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.18610","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Preprint"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/7","score":0.8950455784797668,"display_name":"Affordable and clean energy"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Multimodal":[0],"Large":[1],"Language":[2],"Models":[3],"(MLLMs)":[4],"have":[5],"achieved":[6],"remarkable":[7],"progress":[8],"but":[9],"incur":[10],"substantial":[11],"computational":[12],"overhead":[13],"and":[14,57,85,97,135,144,162],"energy":[15,35],"consumption":[16],"during":[17],"inference,":[18],"limiting":[19],"deployment":[20],"in":[21,80],"resource-constrained":[22],"environments.":[23],"Spiking":[24],"Neural":[25],"Networks":[26],"(SNNs),":[27],"with":[28,129],"their":[29],"sparse":[30],"event-driven":[31],"computation,":[32],"offer":[33],"inherent":[34],"efficiency":[36,166],"advantages":[37],"on":[38,110,142],"neuromorphic":[39],"hardware,":[40],"yet":[41],"extending":[42],"them":[43],"to":[44,107,138,154,168],"MLLMs":[45,113],"faces":[46],"two":[47],"key":[48],"challenges:":[49],"heterogeneous":[50],"modalities":[51],"make":[52],"uniform":[53],"spike":[54],"encoding":[55],"insufficient,":[56],"high-resolution":[58],"image":[59],"inputs":[60],"amplify":[61],"timestep":[62,103,126],"unfolding":[63],"overhead.":[64],"We":[65,146],"propose":[66],"SpikeMLLM,":[67],"the":[68,81,139,155,179],"first":[69],"spike-based":[70],"framework":[71],"for":[72,102,184],"MLLMs,":[73],"which":[74],"unifies":[75],"existing":[76],"ANN":[77],"quantization":[78],"methods":[79],"spiking":[82],"representation":[83],"space":[84],"incorporates":[86],"Modality-Specific":[87],"Temporal":[88],"Scales":[89],"(MSTS)":[90],"guided":[91],"by":[92],"Modality":[93],"Evolution":[94],"Discrepancy":[95],"(MED)":[96],"Temporally":[98],"Compressed":[99],"LIF":[100],"(TC-LIF)":[101],"compression":[104,127],"from":[105],"T=L-1":[106],"T=log2(L)-1.":[108],"Experiments":[109],"four":[111],"representative":[112],"across":[114],"diverse":[115],"multimodal":[116,186],"benchmarks":[117],"show":[118],"that":[119],"SpikeMLLM":[120],"maintains":[121],"near-lossless":[122],"performance":[123],"under":[124,173],"aggressive":[125],"(Tv/Tt=3/4),":[128],"average":[130],"gaps":[131],"of":[132,181],"only":[133],"0.72%":[134],"1.19%":[136],"relative":[137,167],"FP16":[140,170],"baseline":[141,172],"InternVL2-8B":[143],"Qwen2VL-72B.":[145],"further":[147],"develop":[148],"a":[149,174],"dedicated":[150],"RTL":[151],"accelerator":[152],"tailored":[153],"spike-driven":[156],"datapath,":[157],"observing":[158],"9.06x":[159],"higher":[160],"throughput":[161],"25.8x":[163],"better":[164],"power":[165],"an":[169],"GPU":[171],"deployment-oriented":[175],"co-design":[176,183],"setting,":[177],"suggesting":[178],"promise":[180],"algorithm-hardware":[182],"efficient":[185],"intelligence.":[187]},"counts_by_year":[],"updated_date":"2026-07-01T06:00:48.157686","created_date":"2026-04-23T00:00:00"}
