{"id":"https://openalex.org/W7140107826","doi":"https://doi.org/10.48550/arxiv.2603.19667","title":"Toward High-Fidelity Visual Reconstruction: From EEG-Based Conditioned Generation to Joint-Modal Guided Rebuilding","display_name":"Toward High-Fidelity Visual Reconstruction: From EEG-Based Conditioned Generation to Joint-Modal Guided Rebuilding","publication_year":2026,"publication_date":"2026-03-20","ids":{"openalex":"https://openalex.org/W7140107826","doi":"https://doi.org/10.48550/arxiv.2603.19667"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.19667","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.19667","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.19667","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Gong, Zhijian","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gong, Zhijian","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130378292","display_name":"Tianren Yao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yao, Tianren","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130347649","display_name":"Wenjia Dong","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dong, Wenjia","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5047941974","display_name":"Xueyuan Xu","orcid":"https://orcid.org/0000-0003-4811-5459"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xu, Xueyuan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10429","display_name":"EEG and Brain-Computer Interfaces","score":0.4147999882698059,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T10429","display_name":"EEG and Brain-Computer Interfaces","score":0.4147999882698059,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T11094","display_name":"Face Recognition and Perception","score":0.11620000004768372,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T11605","display_name":"Visual Attention and Saliency Detection","score":0.08470000326633453,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/electroencephalography","display_name":"Electroencephalography","score":0.6118999719619751},{"id":"https://openalex.org/keywords/chromatic-scale","display_name":"Chromatic scale","score":0.5676000118255615},{"id":"https://openalex.org/keywords/visual-perception","display_name":"Visual perception","score":0.4749000072479248},{"id":"https://openalex.org/keywords/perception","display_name":"Perception","score":0.4447999894618988},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4377000033855438},{"id":"https://openalex.org/keywords/human-visual-system-model","display_name":"Human visual system model","score":0.4221000075340271},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.41850000619888306},{"id":"https://openalex.org/keywords/modalities","display_name":"Modalities","score":0.41190001368522644}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6590999960899353},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.630299985408783},{"id":"https://openalex.org/C522805319","wikidata":"https://www.wikidata.org/wiki/Q179965","display_name":"Electroencephalography","level":2,"score":0.6118999719619751},{"id":"https://openalex.org/C196956537","wikidata":"https://www.wikidata.org/wiki/Q202021","display_name":"Chromatic scale","level":2,"score":0.5676000118255615},{"id":"https://openalex.org/C178253425","wikidata":"https://www.wikidata.org/wiki/Q162668","display_name":"Visual perception","level":3,"score":0.4749000072479248},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.44620001316070557},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.4447999894618988},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4377000033855438},{"id":"https://openalex.org/C160086991","wikidata":"https://www.wikidata.org/wiki/Q5939193","display_name":"Human visual system model","level":3,"score":0.4221000075340271},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.41850000619888306},{"id":"https://openalex.org/C2779903281","wikidata":"https://www.wikidata.org/wiki/Q6888026","display_name":"Modalities","level":2,"score":0.41190001368522644},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.35749998688697815},{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.3531999886035919},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.3531000018119812},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.33070001006126404},{"id":"https://openalex.org/C19768560","wikidata":"https://www.wikidata.org/wiki/Q320727","display_name":"Dependency (UML)","level":2,"score":0.3075000047683716},{"id":"https://openalex.org/C141379421","wikidata":"https://www.wikidata.org/wiki/Q6094427","display_name":"Iterative reconstruction","level":2,"score":0.3059999942779541},{"id":"https://openalex.org/C64876066","wikidata":"https://www.wikidata.org/wiki/Q5141226","display_name":"Cognitive neuroscience of visual object recognition","level":3,"score":0.2948000133037567},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.2660999894142151},{"id":"https://openalex.org/C2780103172","wikidata":"https://www.wikidata.org/wiki/Q1309721","display_name":"Visual Objects","level":3,"score":0.2572999894618988},{"id":"https://openalex.org/C169900460","wikidata":"https://www.wikidata.org/wiki/Q2200417","display_name":"Cognition","level":2,"score":0.25600001215934753},{"id":"https://openalex.org/C18555067","wikidata":"https://www.wikidata.org/wiki/Q8375051","display_name":"Joint (building)","level":2,"score":0.2547999918460846}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.19667","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.19667","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.19667","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.19667","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Human":[0],"visual":[1,7,26,81],"reconstruction":[2],"aims":[3],"to":[4,52,107,121,131],"reconstruct":[5],"fine-grained":[6],"stimuli":[8],"based":[9],"on":[10,140],"subject-provided":[11],"descriptions":[12],"and":[13,33,67,99,125,162],"corresponding":[14],"neural":[15],"signals.":[16],"As":[17],"a":[18,89,116],"widely":[19],"adopted":[20],"modality,":[21],"Electroencephalography":[22],"(EEG)":[23],"captures":[24],"rich":[25,65],"cognition":[27],"information,":[28],"encompassing":[29],"complex":[30],"spatial":[31,66,160],"relationships":[32],"chromatic":[34,68,163],"details":[35,69],"within":[36],"scenes.":[37],"However,":[38],"current":[39],"approaches":[40],"are":[41],"deeply":[42],"coupled":[43],"with":[44,54],"an":[45],"alignment":[46],"framework":[47],"that":[48,72,145],"forces":[49],"EEG":[50,71,98,118],"features":[51],"align":[53],"text":[55,100],"or":[56],"image":[57,76,129],"semantic":[58],"representation.":[59],"The":[60],"dependency":[61],"may":[62],"condense":[63],"the":[64,133,141],"in":[70,158],"achieved":[73],"mere":[74],"conditioned":[75],"generation":[77],"rather":[78],"than":[79],"high-fidelity":[80],"reconstruction.":[82,112],"To":[83],"address":[84],"this":[85],"limitation,":[86],"we":[87],"propose":[88],"novel":[90],"Joint-Modal":[91],"Visual":[92],"Reconstruction":[93],"(JMVR)":[94],"framework.":[95],"It":[96,113],"treats":[97],"as":[101],"independent":[102],"modalities":[103],"for":[104,111],"joint":[105],"learning":[106],"preserve":[108],"EEG-specific":[109],"information":[110],"further":[114],"employs":[115],"multi-scale":[117],"encoding":[119],"strategy":[120],"capture":[122],"both":[123],"fine-":[124],"coarse-grained":[126],"features,":[127],"alongside":[128],"augmentation":[130],"enhance":[132],"recovery":[134],"of":[135],"perceptual":[136],"details.":[137],"Extensive":[138],"experiments":[139],"THINGS-EEG":[142],"dataset":[143],"demonstrate":[144],"JMVR":[146],"achieves":[147],"SOTA":[148],"performance":[149],"against":[150],"six":[151],"baseline":[152],"methods,":[153],"specifically":[154],"exhibiting":[155],"superior":[156],"capabilities":[157],"modeling":[159],"structure":[161],"fidelity.":[164]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-03-24T00:00:00"}
