{"id":"https://openalex.org/W3198586259","doi":"https://doi.org/10.1145/3460426.3463624","title":"Multi-Attention Audio-Visual Fusion Network for Audio Spatialization","display_name":"Multi-Attention Audio-Visual Fusion Network for Audio Spatialization","publication_year":2021,"publication_date":"2021-08-24","ids":{"openalex":"https://openalex.org/W3198586259","doi":"https://doi.org/10.1145/3460426.3463624","mag":"3198586259"},"language":"en","primary_location":{"id":"doi:10.1145/3460426.3463624","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3460426.3463624","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2021 International Conference on Multimedia Retrieval","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5018530110","display_name":"Wen Zhang","orcid":"https://orcid.org/0000-0003-4665-8142"},"institutions":[{"id":"https://openalex.org/I150229711","display_name":"University of Electronic Science and Technology of China","ror":"https://ror.org/04qr3zq92","country_code":"CN","type":"education","lineage":["https://openalex.org/I150229711"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Wen Zhang","raw_affiliation_strings":["University of Electronic Science and Technology of China, Chengdu, China"],"affiliations":[{"raw_affiliation_string":"University of Electronic Science and Technology of China, Chengdu, China","institution_ids":["https://openalex.org/I150229711"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5072350518","display_name":"Jie Shao","orcid":"https://orcid.org/0000-0003-2615-1555"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jie Shao","raw_affiliation_strings":["Sichuan Artificial Intelligence Research Institute, Yibin, China"],"affiliations":[{"raw_affiliation_string":"Sichuan Artificial Intelligence Research Institute, Yibin, China","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5018530110"],"corresponding_institution_ids":["https://openalex.org/I150229711"],"apc_list":null,"apc_paid":null,"fwci":0.6094,"has_fulltext":false,"cited_by_count":7,"citation_normalized_percentile":{"value":0.6703854,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"394","last_page":"401"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9965999722480774,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10688","display_name":"Image and Signal Denoising Methods","score":0.993399977684021,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.854806661605835},{"id":"https://openalex.org/keywords/spatialization","display_name":"Spatialization","score":0.7017477750778198},{"id":"https://openalex.org/keywords/stereophonic-sound","display_name":"Stereophonic sound","score":0.6211960315704346},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.5882574915885925},{"id":"https://openalex.org/keywords/audio-signal","display_name":"Audio signal","score":0.561351478099823},{"id":"https://openalex.org/keywords/fuse","display_name":"Fuse (electrical)","score":0.5602524280548096},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.557386577129364},{"id":"https://openalex.org/keywords/audio-visual","display_name":"Audio visual","score":0.5017585754394531},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.48484623432159424},{"id":"https://openalex.org/keywords/sound-quality","display_name":"Sound quality","score":0.48177191615104675},{"id":"https://openalex.org/keywords/visualization","display_name":"Visualization","score":0.4752642512321472},{"id":"https://openalex.org/keywords/audio-analyzer","display_name":"Audio analyzer","score":0.43006134033203125},{"id":"https://openalex.org/keywords/audio-signal-processing","display_name":"Audio signal processing","score":0.3991740345954895},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.30430006980895996},{"id":"https://openalex.org/keywords/multimedia","display_name":"Multimedia","score":0.256009042263031},{"id":"https://openalex.org/keywords/speech-coding","display_name":"Speech coding","score":0.17213529348373413}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.854806661605835},{"id":"https://openalex.org/C2777031145","wikidata":"https://www.wikidata.org/wiki/Q4430987","display_name":"Spatialization","level":2,"score":0.7017477750778198},{"id":"https://openalex.org/C140631703","wikidata":"https://www.wikidata.org/wiki/Q34678","display_name":"Stereophonic sound","level":3,"score":0.6211960315704346},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.5882574915885925},{"id":"https://openalex.org/C64922751","wikidata":"https://www.wikidata.org/wiki/Q4650799","display_name":"Audio signal","level":3,"score":0.561351478099823},{"id":"https://openalex.org/C141353440","wikidata":"https://www.wikidata.org/wiki/Q182221","display_name":"Fuse (electrical)","level":2,"score":0.5602524280548096},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.557386577129364},{"id":"https://openalex.org/C3017588708","wikidata":"https://www.wikidata.org/wiki/Q758901","display_name":"Audio visual","level":2,"score":0.5017585754394531},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.48484623432159424},{"id":"https://openalex.org/C167310288","wikidata":"https://www.wikidata.org/wiki/Q7564808","display_name":"Sound quality","level":2,"score":0.48177191615104675},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.4752642512321472},{"id":"https://openalex.org/C160372630","wikidata":"https://www.wikidata.org/wiki/Q4819855","display_name":"Audio analyzer","level":5,"score":0.43006134033203125},{"id":"https://openalex.org/C127220857","wikidata":"https://www.wikidata.org/wiki/Q2719318","display_name":"Audio signal processing","level":4,"score":0.3991740345954895},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.30430006980895996},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.256009042263031},{"id":"https://openalex.org/C13895895","wikidata":"https://www.wikidata.org/wiki/Q3270773","display_name":"Speech coding","level":2,"score":0.17213529348373413},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0},{"id":"https://openalex.org/C19165224","wikidata":"https://www.wikidata.org/wiki/Q23404","display_name":"Anthropology","level":1,"score":0.0},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.0},{"id":"https://openalex.org/C127162648","wikidata":"https://www.wikidata.org/wiki/Q16858953","display_name":"Channel (broadcasting)","level":2,"score":0.0},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3460426.3463624","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3460426.3463624","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2021 International Conference on Multimedia Retrieval","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G4253300795","display_name":null,"funder_award_id":"61832001","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":34,"referenced_works":["https://openalex.org/W1901129140","https://openalex.org/W1977985044","https://openalex.org/W2117539524","https://openalex.org/W2143169494","https://openalex.org/W2150415460","https://openalex.org/W2194775991","https://openalex.org/W2544224704","https://openalex.org/W2608979556","https://openalex.org/W2625027024","https://openalex.org/W2626699878","https://openalex.org/W2626778328","https://openalex.org/W2660943524","https://openalex.org/W2796292145","https://openalex.org/W2796992393","https://openalex.org/W2797090057","https://openalex.org/W2797263747","https://openalex.org/W2800288142","https://openalex.org/W2889380691","https://openalex.org/W2922538097","https://openalex.org/W2938126400","https://openalex.org/W2939598386","https://openalex.org/W2950388022","https://openalex.org/W2951237705","https://openalex.org/W2952558884","https://openalex.org/W2962732076","https://openalex.org/W2964207404","https://openalex.org/W2979157532","https://openalex.org/W2981816492","https://openalex.org/W2982624843","https://openalex.org/W3034865020","https://openalex.org/W3122808968","https://openalex.org/W3123318516","https://openalex.org/W3154807520","https://openalex.org/W4289665794"],"related_works":["https://openalex.org/W1963904765","https://openalex.org/W3110950319","https://openalex.org/W51043520","https://openalex.org/W2092226312","https://openalex.org/W2063364583","https://openalex.org/W2020952589","https://openalex.org/W2792033502","https://openalex.org/W3043119899","https://openalex.org/W3081509514","https://openalex.org/W2131407535"],"abstract_inverted_index":{"In":[0,49],"our":[1],"daily":[2],"life,":[3],"we":[4,63,142],"are":[5],"exposed":[6],"to":[7,43,51,58,68,81,86,112,118,136],"a":[8,32,101],"large":[9,33],"number":[10,34],"of":[11,35,78],"video":[12,16,21,74,93,124,168],"files.":[13],"Compared":[14],"with":[15,22,27,46,61,139],"containing":[17],"only":[18],"mono":[19,70],"audio,":[20],"stereo":[23,138,155],"can":[24,149],"provide":[25],"us":[26],"better":[28],"audio-visual":[29],"experience.":[30],"However,":[31],"ordinary":[36],"users":[37,57],"do":[38],"not":[39],"have":[40],"professional":[41],"equipment":[42],"record":[44],"videos":[45,60],"high-quality":[47],"stereo.":[48,76],"order":[50,135],"make":[52],"it":[53],"more":[54],"convenient":[55],"for":[56],"obtain":[59,137],"stereo,":[62],"propose":[64],"an":[65,144],"effective":[66],"method":[67],"convert":[69],"audio":[71,97,130],"in":[72,122,134],"the":[73,79,96,109,114,119,123,153],"into":[75,95,129],"One":[77],"keys":[80],"this":[82],"task":[83],"is":[84,163],"how":[85],"effectively":[87],"inject":[88],"visual":[89],"information":[90],"extracted":[91],"from":[92],"frames":[94,125],"signal.":[98],"We":[99],"design":[100,143],"novel":[102],"multi-attention":[103],"fusion":[104],"network":[105],"(MAFNet)":[106],"based":[107],"on":[108,165],"self-attention":[110],"mechanism":[111],"extract":[113],"spatial":[115],"features":[116,131],"related":[117],"sound":[120,156],"source":[121],"and":[126,151,171,173],"fuse":[127],"them":[128],"well.":[132],"Furthermore,":[133],"higher":[140],"quality,":[141],"additional":[145],"iterative":[146],"structure":[147],"which":[148],"refine":[150],"optimize":[152],"generated":[154],"by":[157],"several":[158],"iterations.":[159],"Our":[160],"proposed":[161],"approach":[162],"validated":[164],"two":[166],"challenging":[167],"datasets":[169],"(FAIR-Play":[170],"YT-MUSIC),":[172],"achieves":[174],"new":[175],"state-of-the-art":[176],"performance.":[177]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":4}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
