{"id":"https://openalex.org/W4415536773","doi":"https://doi.org/10.1145/3746027.3755443","title":"CrossMind-VL: Multi-Subject Mind-to-Video Decoding with Multimodal LLM Semantic Grounding","display_name":"CrossMind-VL: Multi-Subject Mind-to-Video Decoding with Multimodal LLM Semantic Grounding","publication_year":2025,"publication_date":"2025-10-25","ids":{"openalex":"https://openalex.org/W4415536773","doi":"https://doi.org/10.1145/3746027.3755443"},"language":null,"primary_location":{"id":"doi:10.1145/3746027.3755443","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3746027.3755443","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 33rd ACM International Conference on Multimedia","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5113203417","display_name":"Xuanliu Zhu","orcid":null},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xuanliu Zhu","raw_affiliation_strings":["Beijing University of Posts and Telecommunications, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0006-5504-1768","affiliations":[{"raw_affiliation_string":"Beijing University of Posts and Telecommunications, Beijing, China","institution_ids":["https://openalex.org/I139759216"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5119470604","display_name":"Yiqiao Chai","orcid":null},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yiqiao Chai","raw_affiliation_strings":["Beijing University of Posts and Telecommunications, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0003-8433-3864","affiliations":[{"raw_affiliation_string":"Beijing University of Posts and Telecommunications, Beijing, China","institution_ids":["https://openalex.org/I139759216"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109735217","display_name":"Runnan Li","orcid":"https://orcid.org/0009-0007-2220-7626"},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Runnan Li","raw_affiliation_strings":["Beijing University of Posts and Telecommunications, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0007-2220-7626","affiliations":[{"raw_affiliation_string":"Beijing University of Posts and Telecommunications, Beijing, China","institution_ids":["https://openalex.org/I139759216"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5001430743","display_name":"Mingying Lan","orcid":"https://orcid.org/0000-0003-0986-1336"},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Mingying Lan","raw_affiliation_strings":["Beijing University of Posts and Telecommunications, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0003-0986-1336","affiliations":[{"raw_affiliation_string":"Beijing University of Posts and Telecommunications, Beijing, China","institution_ids":["https://openalex.org/I139759216"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5015456449","display_name":"Li Gao","orcid":"https://orcid.org/0009-0002-4402-6643"},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Li Gao","raw_affiliation_strings":["Beijing University of Posts and Telecommunications, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0002-4402-6643","affiliations":[{"raw_affiliation_string":"Beijing University of Posts and Telecommunications, Beijing, China","institution_ids":["https://openalex.org/I139759216"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I139759216"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.25245207,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"4446","last_page":"4454"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9933000206947327,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9933000206947327,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9871000051498413,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.9848999977111816,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/decoding-methods","display_name":"Decoding methods","score":0.524399995803833},{"id":"https://openalex.org/keywords/granularity","display_name":"Granularity","score":0.5126000046730042},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.5074999928474426},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.47279998660087585},{"id":"https://openalex.org/keywords/visualization","display_name":"Visualization","score":0.4699999988079071},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.4571000039577484},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.43380001187324524},{"id":"https://openalex.org/keywords/bridge","display_name":"Bridge (graph theory)","score":0.41119998693466187},{"id":"https://openalex.org/keywords/encoding","display_name":"Encoding (memory)","score":0.3824999928474426}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8263000249862671},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5659000277519226},{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.524399995803833},{"id":"https://openalex.org/C177774035","wikidata":"https://www.wikidata.org/wiki/Q1246948","display_name":"Granularity","level":2,"score":0.5126000046730042},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.5074999928474426},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.47279998660087585},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.4699999988079071},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.4571000039577484},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.43380001187324524},{"id":"https://openalex.org/C100776233","wikidata":"https://www.wikidata.org/wiki/Q2532492","display_name":"Bridge (graph theory)","level":2,"score":0.41119998693466187},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.3824999928474426},{"id":"https://openalex.org/C86034646","wikidata":"https://www.wikidata.org/wiki/Q474311","display_name":"Semantic gap","level":4,"score":0.3695000112056732},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3382999897003174},{"id":"https://openalex.org/C171018156","wikidata":"https://www.wikidata.org/wiki/Q7370306","display_name":"Rotation formalisms in three dimensions","level":2,"score":0.3361000120639801},{"id":"https://openalex.org/C40743351","wikidata":"https://www.wikidata.org/wiki/Q7002049","display_name":"Neural decoding","level":3,"score":0.3253999948501587},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3122999966144562},{"id":"https://openalex.org/C197914299","wikidata":"https://www.wikidata.org/wiki/Q18650","display_name":"Semantic memory","level":3,"score":0.2946999967098236},{"id":"https://openalex.org/C178253425","wikidata":"https://www.wikidata.org/wiki/Q162668","display_name":"Visual perception","level":3,"score":0.29429998993873596},{"id":"https://openalex.org/C119666444","wikidata":"https://www.wikidata.org/wiki/Q5977280","display_name":"Temporal resolution","level":2,"score":0.2906999886035919},{"id":"https://openalex.org/C66746571","wikidata":"https://www.wikidata.org/wiki/Q1134833","display_name":"ENCODE","level":3,"score":0.28999999165534973},{"id":"https://openalex.org/C157657479","wikidata":"https://www.wikidata.org/wiki/Q2367247","display_name":"Closed captioning","level":3,"score":0.2800000011920929},{"id":"https://openalex.org/C2779226451","wikidata":"https://www.wikidata.org/wiki/Q903809","display_name":"Functional magnetic resonance imaging","level":2,"score":0.2800000011920929},{"id":"https://openalex.org/C203005215","wikidata":"https://www.wikidata.org/wiki/Q79798","display_name":"Machine translation","level":2,"score":0.27869999408721924},{"id":"https://openalex.org/C2779304628","wikidata":"https://www.wikidata.org/wiki/Q3503480","display_name":"Face (sociological concept)","level":2,"score":0.27230000495910645},{"id":"https://openalex.org/C2777946921","wikidata":"https://www.wikidata.org/wiki/Q7449044","display_name":"Semantic analysis (machine learning)","level":2,"score":0.26089999079704285},{"id":"https://openalex.org/C511192102","wikidata":"https://www.wikidata.org/wiki/Q5156948","display_name":"Comprehension","level":2,"score":0.25189998745918274}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3746027.3755443","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3746027.3755443","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 33rd ACM International Conference on Multimedia","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":11,"referenced_works":["https://openalex.org/W2007226897","https://openalex.org/W2074299407","https://openalex.org/W2090951183","https://openalex.org/W2102673550","https://openalex.org/W2112180451","https://openalex.org/W2950953090","https://openalex.org/W2997264523","https://openalex.org/W3031175989","https://openalex.org/W4361855028","https://openalex.org/W4386881720","https://openalex.org/W4387969592"],"related_works":[],"abstract_inverted_index":{"Decoding":[0],"dynamic":[1],"visual":[2,64,160],"information":[3],"from":[4,217],"brain":[5],"activity":[6],"remains":[7],"challenging":[8],"due":[9],"to":[10,45,144],"inter-subject":[11],"neural":[12,48,61,224],"heterogeneity,":[13],"limited":[14,43],"per-subject":[15],"data":[16,206],"availability,":[17],"and":[18,28,54,118,186,222],"the":[19,142,158],"substantial":[20],"temporal":[21,40,89],"resolution":[22],"gap":[23],"between":[24],"fMRI":[25],"signals":[26],"(0.5Hz)":[27],"video":[29,139],"dynamics":[30],"(30Hz).":[31],"Current":[32],"approaches":[33],"face":[34],"persistent":[35],"challenges":[36],"in":[37,180],"addressing":[38,75],"these":[39,68,76],"mismatches,":[41],"demonstrate":[42,174],"capacity":[44],"integrate":[46],"subject-specific":[47,107,221],"patterns":[49,148],"with":[50,63,96,109,177],"shared":[51,110,223],"representational":[52],"frameworks,":[53],"lack":[55],"adequate":[56],"semantic":[57,132,140],"granularity":[58],"for":[59,137,207],"aligning":[60],"responses":[62],"content.":[65],"To":[66],"bridge":[67],"gaps,":[69],"we":[70],"propose":[71],"CrossMind-VL,":[72],"a":[73,82,101,120],"framework":[74,193],"limitations":[77],"through":[78,113],"three":[79],"innovations:":[80],"(1)":[81],"Dynamic":[83],"Temporal":[84],"Alignment":[85],"module":[86,124,163],"that":[87,105,125],"resolves":[88,126],"mismatches":[90],"via":[91,130],"exponentially":[92],"decayed":[93],"multi-frame":[94],"fusion":[95],"adaptive":[97],"decay":[98],"coefficients;":[99],"(2)":[100],"Brain":[102],"Mixture-of-Experts":[103],"architecture":[104],"combines":[106],"extractors":[108],"expert":[111],"layers":[112],"parameter-efficient":[114],"tri-modal":[115],"contrastive":[116],"learning;":[117],"(3)":[119],"Multi-perspective":[121],"Semantic":[122],"Hyper-Anchoring":[123],"cross-subject":[127],"attention":[128,147],"bias":[129],"multi-dimensional":[131],"decomposition,":[133],"leveraging":[134],"multimodal":[135],"LLMs":[136],"fine-grained":[138],"extraction-enabling":[141],"model":[143],"match":[145],"individual":[146],"as":[149],"different":[150],"subjects":[151],"naturally":[152],"focus":[153],"on":[154,170],"distinct":[155],"aspects":[156],"of":[157,220],"same":[159],"stimulus.":[161],"This":[162],"boosts":[164],"Top-10/Top-100":[165,181],"retrieval":[166],"by":[167],"17.7%/6.6%.":[168],"Experiments":[169],"two":[171],"video-fMRI":[172],"datasets":[173],"state-of-the-art":[175],"performance,":[176],"39%/30%":[178],"improvements":[179],"accuracy":[182],"over":[183],"single-subject":[184],"baselines":[185],"27%":[187],"gains":[188],"against":[189],"multi-subject":[190],"models.":[191],"The":[192],"exhibits":[194],"remarkable":[195],"few-shot":[196],"adaptability,":[197],"retaining":[198],"97%":[199],"performance":[200],"when":[201],"using":[202],"only":[203],"10%":[204],"training":[205],"new":[208],"subjects.":[209],"Visualization":[210],"analysis":[211],"confirms":[212],"this":[213],"generalization":[214],"capability":[215],"stems":[216],"effective":[218],"disentanglement":[219],"representations.":[225]},"counts_by_year":[],"updated_date":"2026-06-26T08:34:08.712188","created_date":"2025-10-25T00:00:00"}
