{"id":"https://openalex.org/W7106015660","doi":"https://doi.org/10.48550/arxiv.2511.11930","title":"Enhancing XR Auditory Realism via Multimodal Scene-Aware Acoustic Rendering","display_name":"Enhancing XR Auditory Realism via Multimodal Scene-Aware Acoustic Rendering","publication_year":2025,"publication_date":"2025-11-14","ids":{"openalex":"https://openalex.org/W7106015660","doi":"https://doi.org/10.48550/arxiv.2511.11930"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2511.11930","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2511.11930","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2511.11930","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Xu, Tianyu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xu, Tianyu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Li, Jihan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Jihan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Zu, Penghe","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zu, Penghe","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Sahay, Pranav","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sahay, Pranav","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Kim, Maruchi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kim, Maruchi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Obeng-Marnu, Jack","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Obeng-Marnu, Jack","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Miller, Farley","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Miller, Farley","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Qian, Xun","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Qian, Xun","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Passarella, Katrina","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Passarella, Katrina","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Rachumalla, Mahitha","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Rachumalla, Mahitha","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Nongpiur, Rajeev","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Nongpiur, Rajeev","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":null,"display_name":"Shin, D.","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shin, D.","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":12,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10283","display_name":"Hearing Loss and Rehabilitation","score":0.4350000023841858,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T10283","display_name":"Hearing Loss and Rehabilitation","score":0.4350000023841858,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T11349","display_name":"Music Technology and Sound Studies","score":0.23250000178813934,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.04670000076293945,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/rendering","display_name":"Rendering (computer graphics)","score":0.8489000201225281},{"id":"https://openalex.org/keywords/virtual-reality","display_name":"Virtual reality","score":0.5823000073432922},{"id":"https://openalex.org/keywords/auditory-display","display_name":"Auditory display","score":0.5092999935150146},{"id":"https://openalex.org/keywords/visualization","display_name":"Visualization","score":0.4325999915599823},{"id":"https://openalex.org/keywords/perception","display_name":"Perception","score":0.3765000104904175},{"id":"https://openalex.org/keywords/sonification","display_name":"Sonification","score":0.3686000108718872},{"id":"https://openalex.org/keywords/loudspeaker","display_name":"Loudspeaker","score":0.3499999940395355},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.3476000130176544},{"id":"https://openalex.org/keywords/surround-sound","display_name":"Surround sound","score":0.3346000015735626}],"concepts":[{"id":"https://openalex.org/C205711294","wikidata":"https://www.wikidata.org/wiki/Q176953","display_name":"Rendering (computer graphics)","level":2,"score":0.8489000201225281},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7350000143051147},{"id":"https://openalex.org/C194969405","wikidata":"https://www.wikidata.org/wiki/Q170519","display_name":"Virtual reality","level":2,"score":0.5823000073432922},{"id":"https://openalex.org/C171179263","wikidata":"https://www.wikidata.org/wiki/Q4820026","display_name":"Auditory display","level":2,"score":0.5092999935150146},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.46639999747276306},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.4325999915599823},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.3765000104904175},{"id":"https://openalex.org/C91607612","wikidata":"https://www.wikidata.org/wiki/Q1416058","display_name":"Sonification","level":2,"score":0.3686000108718872},{"id":"https://openalex.org/C157138929","wikidata":"https://www.wikidata.org/wiki/Q570","display_name":"Loudspeaker","level":2,"score":0.3499999940395355},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.3476000130176544},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3467000126838684},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.335099995136261},{"id":"https://openalex.org/C2780544925","wikidata":"https://www.wikidata.org/wiki/Q569874","display_name":"Surround sound","level":3,"score":0.3346000015735626},{"id":"https://openalex.org/C3020799230","wikidata":"https://www.wikidata.org/wiki/Q160289","display_name":"Auditory perception","level":3,"score":0.3077999949455261},{"id":"https://openalex.org/C38956757","wikidata":"https://www.wikidata.org/wiki/Q716215","display_name":"Audio feedback","level":2,"score":0.3075000047683716},{"id":"https://openalex.org/C153715457","wikidata":"https://www.wikidata.org/wiki/Q254183","display_name":"Augmented reality","level":2,"score":0.2962000072002411},{"id":"https://openalex.org/C178432105","wikidata":"https://www.wikidata.org/wiki/Q2182127","display_name":"Room acoustics","level":3,"score":0.28850001096725464},{"id":"https://openalex.org/C95851461","wikidata":"https://www.wikidata.org/wiki/Q468809","display_name":"Reverberation","level":2,"score":0.2849999964237213},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.28029999136924744},{"id":"https://openalex.org/C167310288","wikidata":"https://www.wikidata.org/wiki/Q7564808","display_name":"Sound quality","level":2,"score":0.2800999879837036},{"id":"https://openalex.org/C121684516","wikidata":"https://www.wikidata.org/wiki/Q7600677","display_name":"Computer graphics (images)","level":1,"score":0.2768999934196472},{"id":"https://openalex.org/C199068039","wikidata":"https://www.wikidata.org/wiki/Q574523","display_name":"Immersion (mathematics)","level":2,"score":0.27639999985694885},{"id":"https://openalex.org/C89505385","wikidata":"https://www.wikidata.org/wiki/Q47146","display_name":"User interface","level":2,"score":0.27459999918937683},{"id":"https://openalex.org/C68236139","wikidata":"https://www.wikidata.org/wiki/Q765652","display_name":"Sound localization","level":2,"score":0.27390000224113464},{"id":"https://openalex.org/C139807058","wikidata":"https://www.wikidata.org/wiki/Q352374","display_name":"Adaptation (eye)","level":2,"score":0.27309998869895935},{"id":"https://openalex.org/C73208851","wikidata":"https://www.wikidata.org/wiki/Q5157303","display_name":"Computational auditory scene analysis","level":2,"score":0.26980000734329224},{"id":"https://openalex.org/C38129911","wikidata":"https://www.wikidata.org/wiki/Q4820038","display_name":"Auditory scene analysis","level":3,"score":0.26919999718666077},{"id":"https://openalex.org/C499572226","wikidata":"https://www.wikidata.org/wiki/Q1937950","display_name":"Sound design","level":3,"score":0.2689000070095062},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.26339998841285706},{"id":"https://openalex.org/C72279823","wikidata":"https://www.wikidata.org/wiki/Q1139726","display_name":"Impulse response","level":2,"score":0.25609999895095825},{"id":"https://openalex.org/C48007421","wikidata":"https://www.wikidata.org/wiki/Q676252","display_name":"Motion capture","level":3,"score":0.25540000200271606},{"id":"https://openalex.org/C113364801","wikidata":"https://www.wikidata.org/wiki/Q26674","display_name":"High fidelity","level":2,"score":0.2533999979496002}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2511.11930","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2511.11930","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2511.11930","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2511.11930","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"In":[0],"Extended":[1],"Reality":[2],"(XR),":[3],"rendering":[4,25],"sound":[5,63,132],"that":[6,45,59],"accurately":[7],"simulates":[8],"real-world":[9],"acoustics":[10],"is":[11],"pivotal":[12],"in":[13,146],"creating":[14],"lifelike":[15],"and":[16,42,87,131,144],"believable":[17],"virtual":[18],"experiences.":[19],"However,":[20],"existing":[21],"XR":[22,148],"spatial":[23],"audio":[24],"methods":[26],"often":[27],"struggle":[28],"with":[29],"real-time":[30,80],"adaptation":[31],"to":[32,67,105],"diverse":[33],"physical":[34,69],"scenes,":[35],"causing":[36],"a":[37,55,73,107],"sensory":[38],"mismatch":[39],"between":[40],"visual":[41],"auditory":[43,149],"cues":[44],"disrupts":[46],"user":[47],"immersion.":[48],"To":[49],"address":[50],"this,":[51],"we":[52],"introduce":[53],"SAMOSA,":[54],"novel":[56],"on-device":[57],"system":[58,104,117],"renders":[60],"spatially":[61],"accurate":[62],"by":[64,78],"dynamically":[65],"adapting":[66],"its":[68],"environment.":[70],"SAMOSA":[71],"leverages":[72],"synergistic":[74],"multimodal":[75],"scene":[76,100],"representation":[77,93],"fusing":[79],"estimations":[81],"of":[82],"room":[83,129],"geometry,":[84],"surface":[85],"materials,":[86],"semantic-driven":[88],"acoustic":[89,97,122],"context.":[90],"This":[91],"rich":[92],"then":[94],"enables":[95],"efficient":[96],"calibration":[98],"via":[99],"priors,":[101],"allowing":[102],"the":[103],"synthesize":[106],"highly":[108],"realistic":[109],"Room":[110],"Impulse":[111],"Response":[112],"(RIR).":[113],"We":[114],"validate":[115],"our":[116],"through":[118],"technical":[119],"evaluation":[120,137],"using":[121],"metrics":[123],"for":[124],"RIR":[125],"synthesis":[126],"across":[127],"various":[128],"configurations":[130],"types,":[133],"alongside":[134],"an":[135],"expert":[136],"(N=12).":[138],"Evaluation":[139],"results":[140],"demonstrate":[141],"SAMOSA's":[142],"feasibility":[143],"efficacy":[145],"enhancing":[147],"realism.":[150]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-11-19T00:00:00"}
