{"id":"https://openalex.org/W4313048473","doi":"https://doi.org/10.1109/ijcnn55064.2022.9892951","title":"Interpretable Binaural Ratio for Visually Guided Binaural Audio Generation","display_name":"Interpretable Binaural Ratio for Visually Guided Binaural Audio Generation","publication_year":2022,"publication_date":"2022-07-18","ids":{"openalex":"https://openalex.org/W4313048473","doi":"https://doi.org/10.1109/ijcnn55064.2022.9892951"},"language":"en","primary_location":{"id":"doi:10.1109/ijcnn55064.2022.9892951","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn55064.2022.9892951","pdf_url":null,"source":{"id":"https://openalex.org/S4363607707","display_name":"2022 International Joint Conference on Neural Networks (IJCNN)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102009652","display_name":"Tao Zheng","orcid":"https://orcid.org/0000-0001-5035-6214"},"institutions":[{"id":"https://openalex.org/I4210108985","display_name":"Bellevue Hospital Center","ror":"https://ror.org/01ky34z31","country_code":"US","type":"healthcare","lineage":["https://openalex.org/I1283621791","https://openalex.org/I4210086933","https://openalex.org/I4210108985"]},{"id":"https://openalex.org/I114017466","display_name":"University of Technology Sydney","ror":"https://ror.org/03f0f6041","country_code":"AU","type":"education","lineage":["https://openalex.org/I114017466"]}],"countries":["AU","US"],"is_corresponding":true,"raw_author_name":"Tao Zheng","raw_affiliation_strings":["University of Technology Sydney,Australia","University of Technology Sydney, Australia","Tencent AI Lab, Bellevue, USA"],"affiliations":[{"raw_affiliation_string":"University of Technology Sydney,Australia","institution_ids":["https://openalex.org/I114017466"]},{"raw_affiliation_string":"University of Technology Sydney, Australia","institution_ids":["https://openalex.org/I114017466"]},{"raw_affiliation_string":"Tencent AI Lab, Bellevue, USA","institution_ids":["https://openalex.org/I4210108985"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5107747167","display_name":"Sunny Verma","orcid":"https://orcid.org/0000-0002-6768-4705"},"institutions":[{"id":"https://openalex.org/I99043593","display_name":"Macquarie University","ror":"https://ror.org/01sf06y89","country_code":"AU","type":"education","lineage":["https://openalex.org/I99043593"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Sunny Verma","raw_affiliation_strings":["Macquarie University,Sydney,Australia","Macquarie University, Sydney, Australia"],"affiliations":[{"raw_affiliation_string":"Macquarie University,Sydney,Australia","institution_ids":["https://openalex.org/I99043593"]},{"raw_affiliation_string":"Macquarie University, Sydney, Australia","institution_ids":["https://openalex.org/I99043593"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100431757","display_name":"Wei Liu","orcid":"https://orcid.org/0000-0002-2187-8125"},"institutions":[{"id":"https://openalex.org/I114017466","display_name":"University of Technology Sydney","ror":"https://ror.org/03f0f6041","country_code":"AU","type":"education","lineage":["https://openalex.org/I114017466"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Wei Liu","raw_affiliation_strings":["University of Technology Sydney,Australia","University of Technology Sydney, Australia"],"affiliations":[{"raw_affiliation_string":"University of Technology Sydney,Australia","institution_ids":["https://openalex.org/I114017466"]},{"raw_affiliation_string":"University of Technology Sydney, Australia","institution_ids":["https://openalex.org/I114017466"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5102009652"],"corresponding_institution_ids":["https://openalex.org/I114017466","https://openalex.org/I4210108985"],"apc_list":null,"apc_paid":null,"fwci":0.3682,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.54439179,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"8"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.991100013256073,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.991100013256073,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9904000163078308,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10283","display_name":"Hearing Loss and Rehabilitation","score":0.9761000275611877,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/binaural-recording","display_name":"Binaural recording","score":0.912007212638855},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7666410207748413},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.4833788275718689},{"id":"https://openalex.org/keywords/ground-truth","display_name":"Ground truth","score":0.4831801950931549},{"id":"https://openalex.org/keywords/limiting","display_name":"Limiting","score":0.41912755370140076},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.38673293590545654}],"concepts":[{"id":"https://openalex.org/C201247586","wikidata":"https://www.wikidata.org/wiki/Q5612967","display_name":"Binaural recording","level":2,"score":0.912007212638855},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7666410207748413},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.4833788275718689},{"id":"https://openalex.org/C146849305","wikidata":"https://www.wikidata.org/wiki/Q370766","display_name":"Ground truth","level":2,"score":0.4831801950931549},{"id":"https://openalex.org/C188198153","wikidata":"https://www.wikidata.org/wiki/Q1613840","display_name":"Limiting","level":2,"score":0.41912755370140076},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.38673293590545654},{"id":"https://openalex.org/C78519656","wikidata":"https://www.wikidata.org/wiki/Q101333","display_name":"Mechanical engineering","level":1,"score":0.0},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/ijcnn55064.2022.9892951","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn55064.2022.9892951","pdf_url":null,"source":{"id":"https://openalex.org/S4363607707","display_name":"2022 International Joint Conference on Neural Networks (IJCNN)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.5799999833106995,"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":39,"referenced_works":["https://openalex.org/W343636949","https://openalex.org/W1506406817","https://openalex.org/W1901129140","https://openalex.org/W2101298784","https://openalex.org/W2194775991","https://openalex.org/W2261427925","https://openalex.org/W2291877678","https://openalex.org/W2318686747","https://openalex.org/W2326925005","https://openalex.org/W2619697695","https://openalex.org/W2763307249","https://openalex.org/W2763753804","https://openalex.org/W2784500888","https://openalex.org/W2801939025","https://openalex.org/W2948012107","https://openalex.org/W2950388022","https://openalex.org/W2962960500","https://openalex.org/W2963066677","https://openalex.org/W2963807156","https://openalex.org/W2963902314","https://openalex.org/W2964345931","https://openalex.org/W3035524453","https://openalex.org/W3048939150","https://openalex.org/W3096780661","https://openalex.org/W3099638501","https://openalex.org/W3138953166","https://openalex.org/W3174854700","https://openalex.org/W4293665662","https://openalex.org/W6611801654","https://openalex.org/W6630452952","https://openalex.org/W6639824700","https://openalex.org/W6692535039","https://openalex.org/W6701655646","https://openalex.org/W6729831399","https://openalex.org/W6745688385","https://openalex.org/W6754782314","https://openalex.org/W6757240503","https://openalex.org/W6763416564","https://openalex.org/W6792340124"],"related_works":["https://openalex.org/W2766995619","https://openalex.org/W4224270619","https://openalex.org/W2579722767","https://openalex.org/W2168148781","https://openalex.org/W1991848873","https://openalex.org/W2026165661","https://openalex.org/W2099651033","https://openalex.org/W2783525307","https://openalex.org/W2593008828","https://openalex.org/W2155323221"],"abstract_inverted_index":{"Video":[0],"and":[1,6,54,83,97,118,133,144,196],"audio":[2,69,90,136],"streams":[3],"are":[4],"essential":[5],"mutually":[7],"complementary":[8],"in":[9,43,193],"multimedia":[10,26],"immersive":[11],"application":[12,24],"scenarios.":[13],"Recent":[14],"studies":[15],"have":[16],"explored":[17],"the":[18,40,44,57,62,67,113,124,127,139,158,163,178,190],"field":[19],"of":[20,32,138,142],"deep":[21],"neural":[22],"net-work":[23],"on":[25],"production,":[27],"e.g.,":[28],"visually":[29,87],"guided":[30,88],"generation":[31],"binaural":[33,89,109],"audio,":[34,146],"where":[35],"Difference":[36,101,116,121],"Mask":[37],"(DM)":[38],"is":[39,51,162],"predominant":[41],"strategy":[42,50,99],"state-of-the-art":[45],"(SOTA)":[46],"work.":[47],"However,":[48],"this":[49],"not":[52],"interpretable":[53,82],"requires":[55],"adding":[56],"ground":[58,148],"truth":[59,149],"output":[60],"as":[61],"input,":[63],"limiting":[64],"applicability.":[65],"Besides,":[66],"generated":[68],"has":[70],"a":[71,95],"relatively":[72],"low":[73,159],"spatial":[74,160],"sensation.":[75],"This":[76],"paper":[77],"aims":[78],"to":[79,86,106,112,176],"develop":[80],"an":[81],"robust":[84],"approach":[85,187],"generation.":[91],"Specifically,":[92],"we":[93,152,168],"generalize":[94],"concept":[96],"new":[98,125,173],"from":[100],"Mask,":[102],"named":[103],"Binaural":[104,179],"Ratio,":[105],"interpret":[107],"its":[108],"property":[110],"relevant":[111],"Inter-aural":[114,119],"Time":[115],"(ITD)":[117],"Level":[120],"(ILD).":[122],"In":[123],"strategy,":[126],"model":[128],"input":[129],"can":[130],"be":[131],"natural":[132],"arbitrary":[134],"mono":[135],"instead":[137],"direct":[140],"sum":[141],"left":[143],"right":[145],"i.e.,":[147],"output.":[150],"Moreover,":[151],"identify":[153],"that":[154,184],"one":[155],"reason":[156],"for":[157],"sensation":[161],"bias":[164],"toward":[165],"mono.":[166],"Thus,":[167],"tackle":[169],"it":[170],"by":[171],"designing":[172],"network":[174],"variants":[175],"learn":[177],"Ratio":[180],"robustly.":[181],"Experiments":[182],"show":[183],"our":[185],"proposed":[186],"significantly":[188],"outperforms":[189],"SOTA":[191],"methods":[192],"both":[194],"objective":[195],"subjective":[197],"evaluation":[198],"metrics.":[199]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
