{"id":"https://openalex.org/W4416250004","doi":"https://doi.org/10.1109/waspaa66052.2025.11230976","title":"Room Impulse Response Generation Conditioned on Acoustic Parameters","display_name":"Room Impulse Response Generation Conditioned on Acoustic Parameters","publication_year":2025,"publication_date":"2025-10-12","ids":{"openalex":"https://openalex.org/W4416250004","doi":"https://doi.org/10.1109/waspaa66052.2025.11230976"},"language":null,"primary_location":{"id":"doi:10.1109/waspaa66052.2025.11230976","is_oa":false,"landing_page_url":"https://doi.org/10.1109/waspaa66052.2025.11230976","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5119982821","display_name":"Silvia Arellano","orcid":null},"institutions":[{"id":"https://openalex.org/I86987016","display_name":"KTH Royal Institute of Technology","ror":"https://ror.org/026vcq606","country_code":"SE","type":"education","lineage":["https://openalex.org/I86987016"]}],"countries":["SE"],"is_corresponding":true,"raw_author_name":"Silvia Arellano","raw_affiliation_strings":["KTH Royal Institute of Technology"],"affiliations":[{"raw_affiliation_string":"KTH Royal Institute of Technology","institution_ids":["https://openalex.org/I86987016"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004645518","display_name":"Chunghsin Yeh","orcid":null},"institutions":[{"id":"https://openalex.org/I116121188","display_name":"Dolby (Netherlands)","ror":"https://ror.org/02kb3q578","country_code":"NL","type":"company","lineage":["https://openalex.org/I116121188","https://openalex.org/I4210093996"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Chunghsin Yeh","raw_affiliation_strings":["Dolby Laboratories"],"affiliations":[{"raw_affiliation_string":"Dolby Laboratories","institution_ids":["https://openalex.org/I116121188"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010738521","display_name":"Gautam Bhattacharya","orcid":"https://orcid.org/0000-0001-6600-0300"},"institutions":[{"id":"https://openalex.org/I116121188","display_name":"Dolby (Netherlands)","ror":"https://ror.org/02kb3q578","country_code":"NL","type":"company","lineage":["https://openalex.org/I116121188","https://openalex.org/I4210093996"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Gautam Bhattacharya","raw_affiliation_strings":["Dolby Laboratories"],"affiliations":[{"raw_affiliation_string":"Dolby Laboratories","institution_ids":["https://openalex.org/I116121188"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5007602555","display_name":"Daniel Arteaga","orcid":"https://orcid.org/0000-0002-9896-4427"},"institutions":[{"id":"https://openalex.org/I116121188","display_name":"Dolby (Netherlands)","ror":"https://ror.org/02kb3q578","country_code":"NL","type":"company","lineage":["https://openalex.org/I116121188","https://openalex.org/I4210093996"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Daniel Arteaga","raw_affiliation_strings":["Dolby Laboratories"],"affiliations":[{"raw_affiliation_string":"Dolby Laboratories","institution_ids":["https://openalex.org/I116121188"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5119982821"],"corresponding_institution_ids":["https://openalex.org/I86987016"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.45142497,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.5710999965667725,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.5710999965667725,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10283","display_name":"Hearing Loss and Rehabilitation","score":0.30250000953674316,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T11349","display_name":"Music Technology and Sound Studies","score":0.014800000004470348,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reverberation","display_name":"Reverberation","score":0.6898000240325928},{"id":"https://openalex.org/keywords/autoregressive-model","display_name":"Autoregressive model","score":0.5454999804496765},{"id":"https://openalex.org/keywords/impulse-response","display_name":"Impulse response","score":0.5062000155448914},{"id":"https://openalex.org/keywords/headphones","display_name":"Headphones","score":0.3984000086784363},{"id":"https://openalex.org/keywords/codec","display_name":"Codec","score":0.39820000529289246},{"id":"https://openalex.org/keywords/active-listening","display_name":"Active listening","score":0.350600004196167},{"id":"https://openalex.org/keywords/spectrogram","display_name":"Spectrogram","score":0.3391000032424927},{"id":"https://openalex.org/keywords/usability","display_name":"Usability","score":0.3352000117301941},{"id":"https://openalex.org/keywords/cepstrum","display_name":"Cepstrum","score":0.32280001044273376}],"concepts":[{"id":"https://openalex.org/C95851461","wikidata":"https://www.wikidata.org/wiki/Q468809","display_name":"Reverberation","level":2,"score":0.6898000240325928},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6328999996185303},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.564300000667572},{"id":"https://openalex.org/C159877910","wikidata":"https://www.wikidata.org/wiki/Q2202883","display_name":"Autoregressive model","level":2,"score":0.5454999804496765},{"id":"https://openalex.org/C72279823","wikidata":"https://www.wikidata.org/wiki/Q1139726","display_name":"Impulse response","level":2,"score":0.5062000155448914},{"id":"https://openalex.org/C2781258422","wikidata":"https://www.wikidata.org/wiki/Q186819","display_name":"Headphones","level":2,"score":0.3984000086784363},{"id":"https://openalex.org/C161765866","wikidata":"https://www.wikidata.org/wiki/Q184748","display_name":"Codec","level":2,"score":0.39820000529289246},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.3864000141620636},{"id":"https://openalex.org/C177291462","wikidata":"https://www.wikidata.org/wiki/Q423038","display_name":"Active listening","level":2,"score":0.350600004196167},{"id":"https://openalex.org/C45273575","wikidata":"https://www.wikidata.org/wiki/Q578970","display_name":"Spectrogram","level":2,"score":0.3391000032424927},{"id":"https://openalex.org/C170130773","wikidata":"https://www.wikidata.org/wiki/Q216378","display_name":"Usability","level":2,"score":0.3352000117301941},{"id":"https://openalex.org/C88485024","wikidata":"https://www.wikidata.org/wiki/Q1054571","display_name":"Cepstrum","level":2,"score":0.32280001044273376},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.3190000057220459},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.31189998984336853},{"id":"https://openalex.org/C70836080","wikidata":"https://www.wikidata.org/wiki/Q837940","display_name":"Impulse (physics)","level":2,"score":0.3052999973297119},{"id":"https://openalex.org/C178432105","wikidata":"https://www.wikidata.org/wiki/Q2182127","display_name":"Room acoustics","level":3,"score":0.30379998683929443},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.2985000014305115},{"id":"https://openalex.org/C2778263558","wikidata":"https://www.wikidata.org/wiki/Q46384","display_name":"Microphone","level":3,"score":0.29280000925064087},{"id":"https://openalex.org/C127220857","wikidata":"https://www.wikidata.org/wiki/Q2719318","display_name":"Audio signal processing","level":4,"score":0.29019999504089355},{"id":"https://openalex.org/C79018884","wikidata":"https://www.wikidata.org/wiki/Q622324","display_name":"Loudness","level":2,"score":0.28459998965263367},{"id":"https://openalex.org/C64922751","wikidata":"https://www.wikidata.org/wiki/Q4650799","display_name":"Audio signal","level":3,"score":0.2809000015258789},{"id":"https://openalex.org/C2776009117","wikidata":"https://www.wikidata.org/wiki/Q2305951","display_name":"Architectural acoustics","level":3,"score":0.2750999927520752},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.25859999656677246},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.25369998812675476},{"id":"https://openalex.org/C2780544925","wikidata":"https://www.wikidata.org/wiki/Q569874","display_name":"Surround sound","level":3,"score":0.25360000133514404}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/waspaa66052.2025.11230976","is_oa":false,"landing_page_url":"https://doi.org/10.1109/waspaa66052.2025.11230976","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":16,"referenced_works":["https://openalex.org/W2091855602","https://openalex.org/W2125114513","https://openalex.org/W2611709348","https://openalex.org/W3097471014","https://openalex.org/W3198454118","https://openalex.org/W3206257526","https://openalex.org/W3213211078","https://openalex.org/W3217317857","https://openalex.org/W4224314935","https://openalex.org/W4231225162","https://openalex.org/W4281264304","https://openalex.org/W4313021454","https://openalex.org/W4390872297","https://openalex.org/W4394805515","https://openalex.org/W4402111756","https://openalex.org/W4404545746"],"related_works":[],"abstract_inverted_index":{"The":[0],"generation":[1,95],"of":[2,39,100,109,130],"room":[3,63],"impulse":[4],"responses":[5],"(RIRs)":[6],"using":[7,160],"deep":[8],"neural":[9],"networks":[10],"has":[11],"attracted":[12],"growing":[13],"research":[14],"interest":[15],"due":[16],"to":[17,75,115,174,196],"its":[18,44],"applications":[19],"in":[20,59,154],"virtual":[21],"and":[22,27,47,112,120,140,149,186,191],"augmented":[23],"reality,":[24],"audio":[25],"postproduction,":[26],"related":[28],"fields.":[29],"Most":[30],"existing":[31],"approaches":[32],"condition":[33],"generative":[34,151],"models":[35,152,173,208],"on":[36,53,97],"physical":[37,83],"descriptions":[38],"a":[40,72,76,98,182,187],"room,":[41],"such":[42],"as":[43],"size,":[45],"shape,":[46],"surface":[48],"materials.":[49],"However,":[50],"this":[51,86],"reliance":[52],"geometric":[54],"information":[55],"limits":[56],"their":[57],"usability":[58],"scenarios":[60],"where":[61],"the":[62,125,155,179,206,215,219],"layout":[64],"is":[65,78],"unknown":[66],"or":[67,165,210],"when":[68],"perceptual":[69],"realism":[70],"(how":[71],"space":[73,126],"sounds":[74],"listener)":[77],"more":[79,138],"important":[80],"than":[81],"strict":[82],"accuracy.":[84],"In":[85],"study,":[87],"we":[88,169],"propose":[89],"an":[90,176],"alternative":[91],"strategy:":[92],"conditioning":[93],"RIR":[94,101,143],"directly":[96],"set":[99],"acoustic":[102],"parameters.":[103],"These":[104],"parameters":[105],"include":[106],"various":[107],"measures":[108],"reverberation":[110,116],"time":[111],"direct":[113],"sound":[114,128],"ratio,":[117],"both":[118,147],"broadband":[119],"bandwise.":[121],"By":[122],"specifying":[123],"how":[124,131],"should":[127,133],"instead":[129],"it":[132],"look,":[134],"our":[135],"method":[136],"enables":[137],"flexible":[139],"perceptually":[141],"driven":[142],"generation.":[144],"We":[145],"explore":[146],"autoregressive":[148,177],"non-autoregressive":[150],"operating":[153],"Descript":[156],"Audio":[157],"Codec":[158],"domain,":[159],"either":[161],"discrete":[162],"token":[163],"sequences":[164],"continuous":[166],"embeddings.":[167],"Specifically,":[168],"have":[170],"selected":[171],"four":[172],"evaluate:":[175],"transformer,":[178],"MaskGIT":[180,216],"model,":[181,185],"flow":[183],"matching":[184],"classifier-based":[188],"approach.":[189],"Objective":[190],"subjective":[192],"evaluations":[193],"are":[194,224],"performed":[195],"compare":[197],"these":[198],"methods":[199],"with":[200,214],"state-of-the-art":[201,212],"alternatives.":[202],"Results":[203],"show":[204],"that":[205],"proposed":[207],"match":[209],"outperform":[211],"alternatives,":[213],"model":[217],"achieving":[218],"best":[220],"performance.":[221],"Listening":[222],"examples":[223],"available":[225],"at":[226],"https://silviaarellanogarcia.github.io/rir-acoustic.":[227]},"counts_by_year":[],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-11-14T00:00:00"}
