{"id":"https://openalex.org/W7161691344","doi":"https://doi.org/10.48550/arxiv.2605.18221","title":"SIREM: Speech-Informed MRI Reconstruction with Learned Sampling","display_name":"SIREM: Speech-Informed MRI Reconstruction with Learned Sampling","publication_year":2026,"publication_date":"2026-05-18","ids":{"openalex":"https://openalex.org/W7161691344","doi":"https://doi.org/10.48550/arxiv.2605.18221"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.18221","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.18221","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.18221","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5136476626","display_name":"Md Hasan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hasan, Md","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136467279","display_name":"Nyvenn de Castro","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Castro, Nyvenn","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5060488239","display_name":"Daiqi Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Daiqi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136467517","display_name":"Lukas Mulzer","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mulzer, Lukas","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5123202539","display_name":"Jana Hutter","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hutter, Jana","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136475178","display_name":"Jonghye Woo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Woo, Jonghye","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5054149506","display_name":"Moritz Zai\u00df","orcid":"https://orcid.org/0000-0001-9780-3616"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zaiss, Moritz","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136462133","display_name":"Andreas Maier","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Maier, Andreas","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5136488627","display_name":"Paula A. Perez-Toro","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Perez-Toro, Paula A.","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":9,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10403","display_name":"Phonetics and Phonology Research","score":0.1264999955892563,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T10403","display_name":"Phonetics and Phonology Research","score":0.1264999955892563,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10241","display_name":"Functional Brain Connectivity Studies","score":0.12460000067949295,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.07569999992847443,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.6600000262260437},{"id":"https://openalex.org/keywords/weighting","display_name":"Weighting","score":0.6258000135421753},{"id":"https://openalex.org/keywords/compressed-sensing","display_name":"Compressed sensing","score":0.5256999731063843},{"id":"https://openalex.org/keywords/sampling","display_name":"Sampling (signal processing)","score":0.5077000260353088},{"id":"https://openalex.org/keywords/iterative-reconstruction","display_name":"Iterative reconstruction","score":0.5059000253677368},{"id":"https://openalex.org/keywords/real-time-mri","display_name":"Real-time MRI","score":0.4948999881744385},{"id":"https://openalex.org/keywords/component","display_name":"Component (thermodynamics)","score":0.4860000014305115},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.4203000068664551},{"id":"https://openalex.org/keywords/visualization","display_name":"Visualization","score":0.40950000286102295},{"id":"https://openalex.org/keywords/frame","display_name":"Frame (networking)","score":0.3944999873638153}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7145000100135803},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.6600000262260437},{"id":"https://openalex.org/C183115368","wikidata":"https://www.wikidata.org/wiki/Q856577","display_name":"Weighting","level":2,"score":0.6258000135421753},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5303999781608582},{"id":"https://openalex.org/C124851039","wikidata":"https://www.wikidata.org/wiki/Q2665459","display_name":"Compressed sensing","level":2,"score":0.5256999731063843},{"id":"https://openalex.org/C140779682","wikidata":"https://www.wikidata.org/wiki/Q210868","display_name":"Sampling (signal processing)","level":3,"score":0.5077000260353088},{"id":"https://openalex.org/C141379421","wikidata":"https://www.wikidata.org/wiki/Q6094427","display_name":"Iterative reconstruction","level":2,"score":0.5059000253677368},{"id":"https://openalex.org/C157787499","wikidata":"https://www.wikidata.org/wiki/Q13479657","display_name":"Real-time MRI","level":3,"score":0.4948999881744385},{"id":"https://openalex.org/C168167062","wikidata":"https://www.wikidata.org/wiki/Q1117970","display_name":"Component (thermodynamics)","level":2,"score":0.4860000014305115},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.4203000068664551},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.40950000286102295},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.3944999873638153},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.39430001378059387},{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.36559998989105225},{"id":"https://openalex.org/C174128100","wikidata":"https://www.wikidata.org/wiki/Q846907","display_name":"Spiral (railway)","level":2,"score":0.3637000024318695},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3564000129699707},{"id":"https://openalex.org/C41727105","wikidata":"https://www.wikidata.org/wiki/Q17009718","display_name":"Dynamic contrast-enhanced MRI","level":3,"score":0.3422999978065491},{"id":"https://openalex.org/C43617652","wikidata":"https://www.wikidata.org/wiki/Q7575399","display_name":"Speech production","level":2,"score":0.31029999256134033},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.30649998784065247},{"id":"https://openalex.org/C13895895","wikidata":"https://www.wikidata.org/wiki/Q3270773","display_name":"Speech coding","level":2,"score":0.29910001158714294},{"id":"https://openalex.org/C4069607","wikidata":"https://www.wikidata.org/wiki/Q868732","display_name":"Aliasing","level":3,"score":0.298799991607666},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.2987000048160553},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.2935999929904938},{"id":"https://openalex.org/C159694833","wikidata":"https://www.wikidata.org/wiki/Q2321565","display_name":"Iterative method","level":2,"score":0.2865000069141388},{"id":"https://openalex.org/C20326153","wikidata":"https://www.wikidata.org/wiki/Q7049638","display_name":"Nonuniform sampling","level":3,"score":0.28619998693466187},{"id":"https://openalex.org/C51432778","wikidata":"https://www.wikidata.org/wiki/Q1259145","display_name":"Independent component analysis","level":2,"score":0.28540000319480896},{"id":"https://openalex.org/C137800194","wikidata":"https://www.wikidata.org/wiki/Q11713455","display_name":"Interpolation (computer graphics)","level":3,"score":0.2825999855995178},{"id":"https://openalex.org/C70958404","wikidata":"https://www.wikidata.org/wiki/Q7512728","display_name":"Signal reconstruction","level":4,"score":0.28049999475479126},{"id":"https://openalex.org/C104114177","wikidata":"https://www.wikidata.org/wiki/Q79782","display_name":"Motion (physics)","level":2,"score":0.27480000257492065},{"id":"https://openalex.org/C136536468","wikidata":"https://www.wikidata.org/wiki/Q1225894","display_name":"Undersampling","level":2,"score":0.2711000144481659},{"id":"https://openalex.org/C2779843651","wikidata":"https://www.wikidata.org/wiki/Q7390335","display_name":"SIGNAL (programming language)","level":2,"score":0.2680000066757202},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.26269999146461487},{"id":"https://openalex.org/C163985040","wikidata":"https://www.wikidata.org/wiki/Q1172399","display_name":"Data acquisition","level":2,"score":0.26100000739097595},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.2515999972820282}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.18221","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.18221","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.18221","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.18221","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Real-time":[0],"magnetic":[1],"resonance":[2],"imaging":[3],"(rtMRI)":[4],"of":[5,11,81,95,142,223],"speech":[6,19,59,72,172,225],"production":[7],"enables":[8],"non-invasive":[9],"visualization":[10],"dynamic":[12],"vocal-tract":[13,69,206],"motion":[14],"and":[15,21,36,45,99,163,183,219],"is":[16,26,67,236],"valuable":[17],"for":[18,214,230],"science":[20],"clinical":[22],"assessment.":[23],"However,":[24],"rtMRI":[25,173,217],"fundamentally":[27],"constrained":[28],"by":[29],"trade-offs":[30],"among":[31],"spatial":[32,105],"resolution,":[33,35],"temporal":[34],"acquisition":[37],"speed,":[38],"often":[39],"leading":[40],"to":[41],"undersampled":[42],"k-space":[43,125,144],"measurements":[44],"degraded":[46],"reconstructions.":[47],"We":[48,127,166],"propose":[49],"SIREM,":[50],"a":[51,61,93,104,130,139,153,188,195],"speech-informed":[52,149,189,216],"MRI":[53,118,161],"reconstruction":[54,190,218],"framework":[55],"that":[56,68,157,192],"uses":[57],"synchronized":[58,224],"as":[60,92,226],"cross-modal":[62],"prior.":[63],"The":[64,108,233],"central":[65],"idea":[66],"configurations":[70],"during":[71],"are":[73],"correlated":[74],"with":[75,148],"the":[76,82,117,170,221],"produced":[77],"acoustics,":[78],"making":[79],"part":[80],"image":[83],"content":[84,122],"predictable":[85],"from":[86,114,123],"audio.":[87],"SIREM":[88,168,186],"models":[89],"each":[90],"frame":[91],"fusion":[94],"an":[96,100,211,227],"audio-driven":[97,159],"component":[98,102],"MRI-driven":[101],"through":[103],"weighting":[106,133],"map.":[107],"audio":[109],"branch":[110,119],"predicts":[111],"articulator-related":[112],"structure":[113],"speech,":[115],"while":[116,202],"reconstructs":[120],"complementary":[121],"measured":[124],"data.":[126],"further":[128],"introduce":[129],"learnable":[131],"soft":[132],"profile":[134],"over":[135],"spiral":[136],"arms,":[137],"enabling":[138],"differentiable":[140],"study":[141],"how":[143],"arm":[145],"usage":[146],"interacts":[147],"fusion.":[150],"This":[151],"yields":[152],"unified":[154],"multimodal":[155,215],"formulation":[156],"combines":[158],"prediction,":[160],"reconstruction,":[162],"sampling":[164],"adaptation.":[165],"evaluate":[167],"on":[169],"USC":[171],"benchmark":[174,213],"against":[175],"standard":[176],"baselines,":[177],"including":[178],"gridding,":[179],"wavelet-based":[180],"compressed":[181],"sensing,":[182],"total":[184],"variation.":[185],"introduces":[187],"paradigm":[191],"operates":[193],"in":[194],"substantially":[196],"higher-throughput":[197],"regime":[198],"than":[199],"iterative":[200],"methods":[201],"preserving":[203],"anatomically":[204],"plausible":[205],"structure.":[207],"These":[208],"results":[209],"establish":[210],"initial":[212],"highlight":[220],"potential":[222],"auxiliary":[228],"prior":[229],"fast":[231],"reconstruction.":[232],"source":[234],"code":[235],"available":[237],"at":[238],"https://github.com/mdhasanai/SIREM":[239]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-20T00:00:00"}
