{"id":"https://openalex.org/W3206231676","doi":"https://doi.org/10.1145/3474085.3475555","title":"Vision-guided Music Source Separation via a Fine-grained Cycle-Separation Network","display_name":"Vision-guided Music Source Separation via a Fine-grained Cycle-Separation Network","publication_year":2021,"publication_date":"2021-10-17","ids":{"openalex":"https://openalex.org/W3206231676","doi":"https://doi.org/10.1145/3474085.3475555","mag":"3206231676"},"language":"en","primary_location":{"id":"doi:10.1145/3474085.3475555","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3474085.3475555","pdf_url":"https://dl.acm.org/action/downloadSupplement?doi=10.1145%2F3474085.3475555&file=mfp2072aux.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 29th ACM International Conference on Multimedia","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/action/downloadSupplement?doi=10.1145%2F3474085.3475555&file=mfp2072aux.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100622671","display_name":"Shuo Ma","orcid":"https://orcid.org/0000-0002-8324-0506"},"institutions":[{"id":"https://openalex.org/I150229711","display_name":"University of Electronic Science and Technology of China","ror":"https://ror.org/04qr3zq92","country_code":"CN","type":"education","lineage":["https://openalex.org/I150229711"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Ma Shuo","raw_affiliation_strings":["University of Electronic Science and Technology of China, Chengdu, China"],"affiliations":[{"raw_affiliation_string":"University of Electronic Science and Technology of China, Chengdu, China","institution_ids":["https://openalex.org/I150229711"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5068950059","display_name":"Yanli Ji","orcid":"https://orcid.org/0000-0001-9122-6141"},"institutions":[{"id":"https://openalex.org/I150229711","display_name":"University of Electronic Science and Technology of China","ror":"https://ror.org/04qr3zq92","country_code":"CN","type":"education","lineage":["https://openalex.org/I150229711"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yanli Ji","raw_affiliation_strings":["University of Electronic Science and Technology of China, Chengdu, China"],"affiliations":[{"raw_affiliation_string":"University of Electronic Science and Technology of China, Chengdu, China","institution_ids":["https://openalex.org/I150229711"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5009074046","display_name":"Xing Xu","orcid":"https://orcid.org/0000-0001-5685-3123"},"institutions":[{"id":"https://openalex.org/I150229711","display_name":"University of Electronic Science and Technology of China","ror":"https://ror.org/04qr3zq92","country_code":"CN","type":"education","lineage":["https://openalex.org/I150229711"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xing Xu","raw_affiliation_strings":["University of Electronic Science and Technology of China, Chengdu, China"],"affiliations":[{"raw_affiliation_string":"University of Electronic Science and Technology of China, Chengdu, China","institution_ids":["https://openalex.org/I150229711"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5037340898","display_name":"Xiaofeng Zhu","orcid":"https://orcid.org/0000-0001-6840-0578"},"institutions":[{"id":"https://openalex.org/I150229711","display_name":"University of Electronic Science and Technology of China","ror":"https://ror.org/04qr3zq92","country_code":"CN","type":"education","lineage":["https://openalex.org/I150229711"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaofeng Zhu","raw_affiliation_strings":["University of Electronic Science and Technology of China, Chengdu, China"],"affiliations":[{"raw_affiliation_string":"University of Electronic Science and Technology of China, Chengdu, China","institution_ids":["https://openalex.org/I150229711"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5100622671"],"corresponding_institution_ids":["https://openalex.org/I150229711"],"apc_list":null,"apc_paid":null,"fwci":0.6094,"has_fulltext":true,"cited_by_count":4,"citation_normalized_percentile":{"value":0.67383367,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"4202","last_page":"4210"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10283","display_name":"Hearing Loss and Rehabilitation","score":0.9926000237464905,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/spectrogram","display_name":"Spectrogram","score":0.9229767322540283},{"id":"https://openalex.org/keywords/source-separation","display_name":"Source separation","score":0.8662867546081543},{"id":"https://openalex.org/keywords/separation","display_name":"Separation (statistics)","score":0.7000373601913452},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6576640605926514},{"id":"https://openalex.org/keywords/residual","display_name":"Residual","score":0.5918139815330505},{"id":"https://openalex.org/keywords/noise","display_name":"Noise (video)","score":0.5266473293304443},{"id":"https://openalex.org/keywords/visualization","display_name":"Visualization","score":0.5174577832221985},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.45600569248199463},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.44044578075408936},{"id":"https://openalex.org/keywords/music-information-retrieval","display_name":"Music information retrieval","score":0.4290672242641449},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.34277260303497314},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.1970939338207245},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.19229274988174438},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.10392603278160095}],"concepts":[{"id":"https://openalex.org/C45273575","wikidata":"https://www.wikidata.org/wiki/Q578970","display_name":"Spectrogram","level":2,"score":0.9229767322540283},{"id":"https://openalex.org/C2776864781","wikidata":"https://www.wikidata.org/wiki/Q52617913","display_name":"Source separation","level":2,"score":0.8662867546081543},{"id":"https://openalex.org/C2776061190","wikidata":"https://www.wikidata.org/wiki/Q7451805","display_name":"Separation (statistics)","level":2,"score":0.7000373601913452},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6576640605926514},{"id":"https://openalex.org/C155512373","wikidata":"https://www.wikidata.org/wiki/Q287450","display_name":"Residual","level":2,"score":0.5918139815330505},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.5266473293304443},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.5174577832221985},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.45600569248199463},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.44044578075408936},{"id":"https://openalex.org/C2777946086","wikidata":"https://www.wikidata.org/wiki/Q1163335","display_name":"Music information retrieval","level":3,"score":0.4290672242641449},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.34277260303497314},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.1970939338207245},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.19229274988174438},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.10392603278160095},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.0},{"id":"https://openalex.org/C153349607","wikidata":"https://www.wikidata.org/wiki/Q36649","display_name":"Visual arts","level":1,"score":0.0},{"id":"https://openalex.org/C558565934","wikidata":"https://www.wikidata.org/wiki/Q2743","display_name":"Musical","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3474085.3475555","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3474085.3475555","pdf_url":"https://dl.acm.org/action/downloadSupplement?doi=10.1145%2F3474085.3475555&file=mfp2072aux.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 29th ACM International Conference on Multimedia","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3474085.3475555","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3474085.3475555","pdf_url":"https://dl.acm.org/action/downloadSupplement?doi=10.1145%2F3474085.3475555&file=mfp2072aux.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 29th ACM International Conference on Multimedia","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3206231676.pdf","grobid_xml":"https://content.openalex.org/works/W3206231676.grobid-xml"},"referenced_works_count":43,"referenced_works":["https://openalex.org/W639708223","https://openalex.org/W1971168548","https://openalex.org/W2000761799","https://openalex.org/W2120847449","https://openalex.org/W2143169494","https://openalex.org/W2150415460","https://openalex.org/W2194775991","https://openalex.org/W2221409856","https://openalex.org/W2408744528","https://openalex.org/W2460742184","https://openalex.org/W2587994092","https://openalex.org/W2593116425","https://openalex.org/W2619697695","https://openalex.org/W2625041691","https://openalex.org/W2795478783","https://openalex.org/W2796992393","https://openalex.org/W2797090057","https://openalex.org/W2889380691","https://openalex.org/W2900292050","https://openalex.org/W2938126400","https://openalex.org/W2951237705","https://openalex.org/W2953311167","https://openalex.org/W2962865004","https://openalex.org/W2963082324","https://openalex.org/W2963115079","https://openalex.org/W2963887950","https://openalex.org/W2964048159","https://openalex.org/W2972767900","https://openalex.org/W2981816492","https://openalex.org/W2981851635","https://openalex.org/W2982619606","https://openalex.org/W3015371781","https://openalex.org/W3017343282","https://openalex.org/W3023371261","https://openalex.org/W3024979138","https://openalex.org/W3096780661","https://openalex.org/W3108367559","https://openalex.org/W3118120400","https://openalex.org/W3121016465","https://openalex.org/W3121480429","https://openalex.org/W3123318516","https://openalex.org/W4288083516","https://openalex.org/W4289665794"],"related_works":["https://openalex.org/W3094316140","https://openalex.org/W3133205200","https://openalex.org/W2898145319","https://openalex.org/W4289363934","https://openalex.org/W2098101267","https://openalex.org/W2898606530","https://openalex.org/W4289362680","https://openalex.org/W2403380333","https://openalex.org/W2059119686","https://openalex.org/W2761596192"],"abstract_inverted_index":{"Music":[0],"source":[1,41,136],"separation":[2,77,118],"from":[3,71],"a":[4,8,32],"sound":[5],"mixture":[6],"remains":[7],"big":[9],"challenge":[10],"because":[11],"there":[12],"often":[13],"exist":[14],"heavy":[15],"overlaps":[16],"and":[17,105,116,120,132],"interactions":[18],"among":[19],"similar":[20],"music":[21,40,55,69,74,135],"signals.":[22],"In":[23],"order":[24],"to":[25],"correctly":[26],"separate":[27],"mixed":[28],"sources,":[29],"we":[30],"propose":[31],"novel":[33],"Fine-grained":[34],"Cycle-Separation":[35],"Network":[36],"(FCSN)":[37],"for":[38,126],"vision-guided":[39],"separation.":[42,137],"With":[43],"the":[44,49,59,72,83,100,103,106,128],"guidance":[45],"of":[46,130],"visual":[47],"features,":[48],"proposed":[50],"FCSN":[51],"approach":[52,109],"preliminarily":[53,67],"separated":[54,68],"sources":[56],"by":[57,65],"minimizing":[58],"residual":[60,84],"spectrogram":[61,85],"which":[62],"is":[63,78],"calculated":[64],"removing":[66],"spectrograms":[70],"original":[73],"mixture.":[75],"The":[76],"repeated":[79],"several":[80],"times":[81],"until":[82],"becomes":[86],"empty":[87],"or":[88],"leaves":[89],"only":[90],"noise.":[91],"Extensive":[92],"experiments":[93],"are":[94],"performed":[95],"on":[96],"three":[97],"large-scale":[98],"datasets,":[99,115],"MUSIC":[101],"(MUSIC-21),":[102],"AudioSet,":[104],"VGGSound.":[107],"Our":[108],"outperforms":[110],"state-of-the-art":[111],"approaches":[112],"in":[113,134],"all":[114],"both":[117],"accuracies":[119],"visualization":[121],"results":[122],"demonstrate":[123],"its":[124],"effectiveness":[125],"solving":[127],"problem":[129],"overlap":[131],"interaction":[133]},"counts_by_year":[{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
