{"id":"https://openalex.org/W4392903429","doi":"https://doi.org/10.1109/icassp48485.2024.10448509","title":"Binauralmusic: A Diverse Dataset for Improving Cross-Modal Binaural Audio Generation","display_name":"Binauralmusic: A Diverse Dataset for Improving Cross-Modal Binaural Audio Generation","publication_year":2024,"publication_date":"2024-03-18","ids":{"openalex":"https://openalex.org/W4392903429","doi":"https://doi.org/10.1109/icassp48485.2024.10448509"},"language":"en","primary_location":{"id":"doi:10.1109/icassp48485.2024.10448509","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/icassp48485.2024.10448509","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5104260041","display_name":"Yunqi Li","orcid":null},"institutions":[{"id":"https://openalex.org/I75689368","display_name":"Communication University of China","ror":"https://ror.org/04facbs33","country_code":"CN","type":"education","lineage":["https://openalex.org/I75689368"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yunqi Li","raw_affiliation_strings":["Communication University of China,School of Data Science and Intelligent Media,Beijing,China,100024"],"affiliations":[{"raw_affiliation_string":"Communication University of China,School of Data Science and Intelligent Media,Beijing,China,100024","institution_ids":["https://openalex.org/I75689368"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5106671844","display_name":"Shulin Liu","orcid":"https://orcid.org/0009-0001-9681-4177"},"institutions":[{"id":"https://openalex.org/I75689368","display_name":"Communication University of China","ror":"https://ror.org/04facbs33","country_code":"CN","type":"education","lineage":["https://openalex.org/I75689368"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shulin Liu","raw_affiliation_strings":["Communication University of China,School of Information and Communication Engineering,Beijing,China,100024"],"affiliations":[{"raw_affiliation_string":"Communication University of China,School of Information and Communication Engineering,Beijing,China,100024","institution_ids":["https://openalex.org/I75689368"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5065017752","display_name":"Haonan Cheng","orcid":"https://orcid.org/0000-0003-3407-4318"},"institutions":[{"id":"https://openalex.org/I4391768176","display_name":"State Key Laboratory of Media Convergence and Communication","ror":"https://ror.org/0595ys057","country_code":null,"type":"facility","lineage":["https://openalex.org/I4391768176","https://openalex.org/I75689368"]},{"id":"https://openalex.org/I75689368","display_name":"Communication University of China","ror":"https://ror.org/04facbs33","country_code":"CN","type":"education","lineage":["https://openalex.org/I75689368"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Haonan Cheng","raw_affiliation_strings":["Communication University of China,State Key Laboratory of Media Convergence and Communication,Beijing,China,100024"],"affiliations":[{"raw_affiliation_string":"Communication University of China,State Key Laboratory of Media Convergence and Communication,Beijing,China,100024","institution_ids":["https://openalex.org/I75689368","https://openalex.org/I4391768176"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100600271","display_name":"Long Ye","orcid":"https://orcid.org/0000-0002-3562-5612"},"institutions":[{"id":"https://openalex.org/I4391768176","display_name":"State Key Laboratory of Media Convergence and Communication","ror":"https://ror.org/0595ys057","country_code":null,"type":"facility","lineage":["https://openalex.org/I4391768176","https://openalex.org/I75689368"]},{"id":"https://openalex.org/I75689368","display_name":"Communication University of China","ror":"https://ror.org/04facbs33","country_code":"CN","type":"education","lineage":["https://openalex.org/I75689368"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Long Ye","raw_affiliation_strings":["Communication University of China,State Key Laboratory of Media Convergence and Communication,Beijing,China,100024"],"affiliations":[{"raw_affiliation_string":"Communication University of China,State Key Laboratory of Media Convergence and Communication,Beijing,China,100024","institution_ids":["https://openalex.org/I75689368","https://openalex.org/I4391768176"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5104260041"],"corresponding_institution_ids":["https://openalex.org/I75689368"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.02739073,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"31","issue":null,"first_page":"7990","last_page":"7994"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10283","display_name":"Hearing Loss and Rehabilitation","score":0.9969000220298767,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/binaural-recording","display_name":"Binaural recording","score":0.8433802127838135},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7493386268615723},{"id":"https://openalex.org/keywords/modal","display_name":"Modal","score":0.5614564418792725},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5590876340866089},{"id":"https://openalex.org/keywords/audio-feedback","display_name":"Audio feedback","score":0.44365808367729187},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.43839162588119507},{"id":"https://openalex.org/keywords/microphone","display_name":"Microphone","score":0.422887921333313},{"id":"https://openalex.org/keywords/multimedia","display_name":"Multimedia","score":0.3570104241371155},{"id":"https://openalex.org/keywords/acoustics","display_name":"Acoustics","score":0.16317975521087646},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.11118736863136292},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.08177003264427185}],"concepts":[{"id":"https://openalex.org/C201247586","wikidata":"https://www.wikidata.org/wiki/Q5612967","display_name":"Binaural recording","level":2,"score":0.8433802127838135},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7493386268615723},{"id":"https://openalex.org/C71139939","wikidata":"https://www.wikidata.org/wiki/Q910194","display_name":"Modal","level":2,"score":0.5614564418792725},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5590876340866089},{"id":"https://openalex.org/C38956757","wikidata":"https://www.wikidata.org/wiki/Q716215","display_name":"Audio feedback","level":2,"score":0.44365808367729187},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.43839162588119507},{"id":"https://openalex.org/C2778263558","wikidata":"https://www.wikidata.org/wiki/Q46384","display_name":"Microphone","level":3,"score":0.422887921333313},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.3570104241371155},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.16317975521087646},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.11118736863136292},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.08177003264427185},{"id":"https://openalex.org/C68115822","wikidata":"https://www.wikidata.org/wiki/Q1068172","display_name":"Sound pressure","level":2,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C188027245","wikidata":"https://www.wikidata.org/wiki/Q750446","display_name":"Polymer chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp48485.2024.10448509","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/icassp48485.2024.10448509","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G104419513","display_name":null,"funder_award_id":"61971383","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G1231421488","display_name":null,"funder_award_id":"under","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G2376276132","display_name":null,"funder_award_id":"China","funder_id":"https://openalex.org/F4320335787","funder_display_name":"Fundamental Research Funds for the Central Universities"},{"id":"https://openalex.org/G3139283728","display_name":null,"funder_award_id":"62201524","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3317480652","display_name":null,"funder_award_id":"Science","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G344453458","display_name":null,"funder_award_id":"201524","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G391238517","display_name":null,"funder_award_id":", and","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G4317426937","display_name":null,"funder_award_id":"CUC23GZ016","funder_id":"https://openalex.org/F4320335787","funder_display_name":"Fundamental Research Funds for the Central Universities"},{"id":"https://openalex.org/G5994120800","display_name":null,"funder_award_id":"Natural","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6058138561","display_name":null,"funder_award_id":", No.","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6723765259","display_name":null,"funder_award_id":"61971383","funder_id":"https://openalex.org/F4320335787","funder_display_name":"Fundamental Research Funds for the Central Universities"},{"id":"https://openalex.org/G6938695607","display_name":null,"funder_award_id":"62271455","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320335787","display_name":"Fundamental Research Funds for the Central Universities","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":18,"referenced_works":["https://openalex.org/W584173323","https://openalex.org/W639708223","https://openalex.org/W1979654135","https://openalex.org/W2110007838","https://openalex.org/W2186222003","https://openalex.org/W2407685581","https://openalex.org/W2759171953","https://openalex.org/W2890855852","https://openalex.org/W2950388022","https://openalex.org/W2982624843","https://openalex.org/W3043163188","https://openalex.org/W3101943858","https://openalex.org/W3169318522","https://openalex.org/W3174854700","https://openalex.org/W3202387331","https://openalex.org/W3213191779","https://openalex.org/W4205278059","https://openalex.org/W4384078665"],"related_works":["https://openalex.org/W2766995619","https://openalex.org/W4224270619","https://openalex.org/W2579722767","https://openalex.org/W2168148781","https://openalex.org/W1991848873","https://openalex.org/W107154053","https://openalex.org/W2651073586","https://openalex.org/W2783391885","https://openalex.org/W2773081779","https://openalex.org/W2733029664"],"abstract_inverted_index":{"Cross-modal":[0],"binaural":[1,28,39,52,66,106],"audio":[2,40,53,67,107],"generation":[3,41,108],"is":[4],"an":[5],"important":[6],"task":[7],"and":[8,17,96,146],"has":[9],"broad":[10],"applications":[11],"such":[12,80,92],"as":[13,81,86,88,93,121],"game":[14],"sound":[15,143],"development":[16],"auditory":[18],"assistance":[19],"for":[20],"the":[21,101,104,118,129],"visually":[22,141],"impaired.":[23],"However,":[24],"existing":[25],"datasets":[26],"lack":[27],"samples":[29],"with":[30,65],"abundant":[31],"visual":[32],"venues.":[33],"As":[34],"a":[35],"consequence,":[36],"state-of-the-art":[37],"cross-modal":[38,105,137],"methods":[42],"have":[43],"weak":[44],"generalization.":[45],"To":[46],"support":[47,134],"research":[48],"on":[49],"building":[50],"robust":[51],"generation,":[54],"we":[55],"construct":[56],"BinauralMusic":[57,119,130],"dataset":[58,120,131],"consisting":[59],"of":[60,103],"5,462":[61],"performance":[62,74,102],"video":[63],"clips":[64],"from":[68,126],"9":[69],"musical":[70],"instrument":[71],"categories.":[72],"The":[73],"venues":[75],"involve":[76],"indoor":[77],"closed":[78],"places":[79],"shopping":[82],"mall,":[83],"hotel,":[84],"bedroom,":[85],"well":[87],"outdoor":[89],"open":[90],"areas":[91],"field,":[94],"garden":[95],"seashore.":[97],"Experiments":[98],"show":[99],"that":[100],"model":[109],"can":[110,132],"be":[111],"significantly":[112],"improved":[113],"by":[114,116],"10.62%":[115],"using":[117],"training":[122],"material.":[123],"Moreover,":[124],"different":[125],"previous":[127],"datasets,":[128],"also":[133],"other":[135],"audio-visual":[136],"learning":[138],"tasks,":[139],"including":[140],"guided":[142],"source":[144],"localization":[145],"separation.":[147]},"counts_by_year":[],"updated_date":"2026-04-16T08:26:57.006410","created_date":"2025-10-10T00:00:00"}
