{"id":"https://openalex.org/W7133226483","doi":"https://doi.org/10.1109/jiot.2026.3669716","title":"MSET: Multimodal Semantic-Enhanced Real-World Beam Prediction via Temporal Modeling With Visual Foundation Models","display_name":"MSET: Multimodal Semantic-Enhanced Real-World Beam Prediction via Temporal Modeling With Visual Foundation Models","publication_year":2026,"publication_date":"2026-03-02","ids":{"openalex":"https://openalex.org/W7133226483","doi":"https://doi.org/10.1109/jiot.2026.3669716"},"language":null,"primary_location":{"id":"doi:10.1109/jiot.2026.3669716","is_oa":false,"landing_page_url":"https://doi.org/10.1109/jiot.2026.3669716","pdf_url":null,"source":{"id":"https://openalex.org/S2480266640","display_name":"IEEE Internet of Things Journal","issn_l":"2327-4662","issn":["2327-4662","2372-2541"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Internet of Things Journal","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Feixiang Liu","orcid":"https://orcid.org/0009-0005-9944-1862"},"institutions":[{"id":"https://openalex.org/I149594827","display_name":"Xidian University","ror":"https://ror.org/05s92vm98","country_code":"CN","type":"education","lineage":["https://openalex.org/I149594827"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Feixiang Liu","raw_affiliation_strings":["State Key Laboratory of Integrated Service Networks, Xidian University, Xi&#x2019;an, Shaanxi, China"],"raw_orcid":"https://orcid.org/0009-0005-9944-1862","affiliations":[{"raw_affiliation_string":"State Key Laboratory of Integrated Service Networks, Xidian University, Xi&#x2019;an, Shaanxi, China","institution_ids":["https://openalex.org/I149594827"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5127780571","display_name":"Xiaohui Li","orcid":null},"institutions":[{"id":"https://openalex.org/I149594827","display_name":"Xidian University","ror":"https://ror.org/05s92vm98","country_code":"CN","type":"education","lineage":["https://openalex.org/I149594827"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaohui Li","raw_affiliation_strings":["Guangzhou Institute of Technology, Xidian University, Guangzhou, China"],"raw_orcid":"https://orcid.org/0000-0002-0589-3712","affiliations":[{"raw_affiliation_string":"Guangzhou Institute of Technology, Xidian University, Guangzhou, China","institution_ids":["https://openalex.org/I149594827"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5127783379","display_name":"Wenhui Gao","orcid":null},"institutions":[{"id":"https://openalex.org/I149594827","display_name":"Xidian University","ror":"https://ror.org/05s92vm98","country_code":"CN","type":"education","lineage":["https://openalex.org/I149594827"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wenhui Gao","raw_affiliation_strings":["State Key Laboratory of Integrated Service Networks, Xidian University, Xi&#x2019;an, Shaanxi, China"],"raw_orcid":"https://orcid.org/0009-0009-4852-9265","affiliations":[{"raw_affiliation_string":"State Key Laboratory of Integrated Service Networks, Xidian University, Xi&#x2019;an, Shaanxi, China","institution_ids":["https://openalex.org/I149594827"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5121608967","display_name":"Jiaqing Xiong","orcid":null},"institutions":[{"id":"https://openalex.org/I149594827","display_name":"Xidian University","ror":"https://ror.org/05s92vm98","country_code":"CN","type":"education","lineage":["https://openalex.org/I149594827"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiaqing Xiong","raw_affiliation_strings":["Guangzhou Institute of Technology, Xidian University, Guangzhou, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Guangzhou Institute of Technology, Xidian University, Guangzhou, China","institution_ids":["https://openalex.org/I149594827"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5064501316","display_name":"Guanchong Niu","orcid":"https://orcid.org/0000-0002-0571-2571"},"institutions":[{"id":"https://openalex.org/I149594827","display_name":"Xidian University","ror":"https://ror.org/05s92vm98","country_code":"CN","type":"education","lineage":["https://openalex.org/I149594827"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Guanchong Niu","raw_affiliation_strings":["Guangzhou Institute of Technology, Xidian University, Guangzhou, China"],"raw_orcid":"https://orcid.org/0000-0002-0571-2571","affiliations":[{"raw_affiliation_string":"Guangzhou Institute of Technology, Xidian University, Guangzhou, China","institution_ids":["https://openalex.org/I149594827"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5082908618","display_name":"Chung Shue Chen","orcid":"https://orcid.org/0000-0002-7702-2369"},"institutions":[{"id":"https://openalex.org/I4210149358","display_name":"Nokia (France)","ror":"https://ror.org/04kwfkk85","country_code":"FR","type":"company","lineage":["https://openalex.org/I2738502077","https://openalex.org/I4210149358"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Chung Shue Chen","raw_affiliation_strings":["Department of Machine Learning and Systems, Nokia Bell Labs, Massy, France"],"raw_orcid":"https://orcid.org/0000-0002-7702-2369","affiliations":[{"raw_affiliation_string":"Department of Machine Learning and Systems, Nokia Bell Labs, Massy, France","institution_ids":["https://openalex.org/I4210149358"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.26335307,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"13","issue":"10","first_page":"22130","last_page":"22148"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.350600004196167,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.350600004196167,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.14239999651908875,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10799","display_name":"Data Visualization and Analytics","score":0.06909999996423721,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.6323999762535095},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.5766000151634216},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.527899980545044},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.4359000027179718},{"id":"https://openalex.org/keywords/prior-probability","display_name":"Prior probability","score":0.43369999527931213},{"id":"https://openalex.org/keywords/context-model","display_name":"Context model","score":0.4287000000476837},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.42399999499320984},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.3919999897480011},{"id":"https://openalex.org/keywords/feature-learning","display_name":"Feature learning","score":0.37630000710487366},{"id":"https://openalex.org/keywords/domain-knowledge","display_name":"Domain knowledge","score":0.3458999991416931}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8598999977111816},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.6323999762535095},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6262999773025513},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.5766000151634216},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.527899980545044},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.4359000027179718},{"id":"https://openalex.org/C177769412","wikidata":"https://www.wikidata.org/wiki/Q278090","display_name":"Prior probability","level":3,"score":0.43369999527931213},{"id":"https://openalex.org/C183322885","wikidata":"https://www.wikidata.org/wiki/Q17007702","display_name":"Context model","level":3,"score":0.4287000000476837},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.42739999294281006},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.42399999499320984},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.39899998903274536},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.3919999897480011},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.37630000710487366},{"id":"https://openalex.org/C207685749","wikidata":"https://www.wikidata.org/wiki/Q2088941","display_name":"Domain knowledge","level":2,"score":0.3458999991416931},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.3398999869823456},{"id":"https://openalex.org/C82990744","wikidata":"https://www.wikidata.org/wiki/Q166194","display_name":"RGB color model","level":2,"score":0.3328999876976013},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.3285999894142151},{"id":"https://openalex.org/C64754055","wikidata":"https://www.wikidata.org/wiki/Q7574053","display_name":"Spatial contextual awareness","level":2,"score":0.32820001244544983},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.31709998846054077},{"id":"https://openalex.org/C93959086","wikidata":"https://www.wikidata.org/wiki/Q6888345","display_name":"Model selection","level":2,"score":0.313400000333786},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.30559998750686646},{"id":"https://openalex.org/C160234255","wikidata":"https://www.wikidata.org/wiki/Q812535","display_name":"Bayesian inference","level":3,"score":0.28769999742507935},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.2849999964237213},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.2831000089645386},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.2800000011920929},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.2754000127315521},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.2628999948501587},{"id":"https://openalex.org/C101738243","wikidata":"https://www.wikidata.org/wiki/Q786435","display_name":"Autoencoder","level":3,"score":0.25929999351501465},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.2565000057220459},{"id":"https://openalex.org/C2777508537","wikidata":"https://www.wikidata.org/wiki/Q7936620","display_name":"Visual reasoning","level":2,"score":0.2563000023365021},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.2533000111579895}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/jiot.2026.3669716","is_oa":false,"landing_page_url":"https://doi.org/10.1109/jiot.2026.3669716","pdf_url":null,"source":{"id":"https://openalex.org/S2480266640","display_name":"IEEE Internet of Things Journal","issn_l":"2327-4662","issn":["2327-4662","2372-2541"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Internet of Things Journal","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/13","score":0.4258182644844055,"display_name":"Climate action"}],"awards":[{"id":"https://openalex.org/G1595715893","display_name":null,"funder_award_id":"B08038","funder_id":"https://openalex.org/F4320327912","funder_display_name":"Higher Education Discipline Innovation Project"},{"id":"https://openalex.org/G1948771034","display_name":null,"funder_award_id":"2018YFB1802004","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"}],"funders":[{"id":"https://openalex.org/F4320327912","display_name":"Higher Education Discipline Innovation Project","ror":null},{"id":"https://openalex.org/F4320335777","display_name":"National Key Research and Development Program of China","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"While":[0],"machine":[1],"learning":[2],"(ML)":[3],"has":[4],"been":[5],"explored":[6],"for":[7,91],"beam":[8],"prediction,":[9],"many":[10],"methods":[11],"remain":[12],"constrained":[13],"by":[14,98],"single-modality":[15],"inputs,":[16],"shallow":[17],"temporal":[18],"modeling,":[19],"and":[20,25,70,76,83,119,138,177,194,201],"limited":[21],"robustness":[22],"to":[23,161,170],"interference":[24],"domain":[26],"shift.":[27],"We":[28,166],"introduce":[29],"Multimodal":[30],"Semantic-Enhanced":[31],"Real-World":[32],"Beam":[33],"Prediction":[34],"via":[35,147],"Temporal":[36,101],"Modeling":[37],"with":[38,50,105,158,173],"Visual":[39],"Foundation":[40],"Models":[41],"(MSET),":[42],"a":[43,54,65,84,99,131,142,148,154],"multimodal":[44],"framework":[45],"that":[46],"couples":[47],"visual":[48,59],"semantics":[49],"positional":[51,159],"priors":[52],"in":[53,122],"causal,":[55],"lightweight":[56,85],"design.":[57],"A":[58],"foundation":[60],"model":[61,151],"(VFM),":[62],"instantiated":[63],"as":[64],"Swin-Transformer,":[66],"learns":[67],"rich":[68],"spatial":[69],"semantic":[71],"cues":[72],"from":[73],"RGB":[74],"images":[75],"Segment":[77],"Anything":[78],"Model":[79],"(SAM)-derived":[80],"region":[81],"masks,":[82],"ResNet-18":[86],"student":[87],"distills":[88],"this":[89],"knowledge":[90],"efficient":[92],"inference.":[93],"Short-horizon":[94],"dynamics":[95],"are":[96],"captured":[97],"causal":[100],"Convolutional":[102],"Network":[103],"(TCN)":[104],"an":[106],"adaptive":[107],"receptive":[108],"field,":[109],"whose":[110],"volatility-driven":[111],"depth":[112],"gate":[113],"expands":[114],"context":[115],"under":[116,181,204],"motion":[117],"spikes":[118],"contracts":[120],"it":[121],"calm":[123],"periods.":[124],"On":[125],"top":[126],"of":[127],"semantic-aware":[128],"frame":[129],"embeddings,":[130],"Temporally":[132],"Aware":[133],"Cross-Attention":[134],"(TACA)":[135],"aligns":[136],"original":[137],"semantic-enhanced":[139],"tokens,":[140],"while":[141],"Mambaconditioned":[143],"GPS":[144],"prior,":[145],"implemented":[146],"selective":[149],"state-space":[150],"(SSM),":[152],"issues":[153],"location-conditioned":[155],"single":[156],"query":[157],"bias":[160],"attend":[162],"the":[163],"fused":[164],"tokens.":[165],"further":[167],"extend":[168],"inference":[169],"dense":[171],"deployments":[172],"multiple":[174],"proximate":[175],"candidates":[176],"address":[178],"target":[179],"selection":[180],"ambiguity.":[182],"Experiments":[183],"on":[184],"DeepSense":[185],"6G":[186],"dataset":[187],"show":[188],"consistent":[189],"Top-k":[190],"improvements":[191],"across":[192],"single/multi-target":[193],"day/night":[195],"scenarios,":[196],"indicating":[197],"reduced":[198],"sweep":[199],"reliance":[200],"strong":[202],"generalization":[203],"realistic":[205],"V2I":[206],"dynamics.":[207]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-03-03T00:00:00"}
