{"id":"https://openalex.org/W4388821024","doi":"https://doi.org/10.1109/apsipaasc58517.2023.10317504","title":"Investigating the Role of Human Action Detector in Visual-guide Audio Source Separation System","display_name":"Investigating the Role of Human Action Detector in Visual-guide Audio Source Separation System","publication_year":2023,"publication_date":"2023-10-31","ids":{"openalex":"https://openalex.org/W4388821024","doi":"https://doi.org/10.1109/apsipaasc58517.2023.10317504"},"language":"en","primary_location":{"id":"doi:10.1109/apsipaasc58517.2023.10317504","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/apsipaasc58517.2023.10317504","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 Asia Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5003217635","display_name":"Thanh Thi Duong","orcid":"https://orcid.org/0000-0001-5854-5845"},"institutions":[{"id":"https://openalex.org/I29199639","display_name":"Hanoi University of Mining and Geology","ror":"https://ror.org/01rw3qm79","country_code":"VN","type":"education","lineage":["https://openalex.org/I29199639"]}],"countries":["VN"],"is_corresponding":true,"raw_author_name":"Thanh Thi-Hien Duong","raw_affiliation_strings":["Hanoi University of Mining and Geology,Dept. Information Technology,Vietnam","Dept. Information Technology, Hanoi University of Mining and Geology, Vietnam"],"affiliations":[{"raw_affiliation_string":"Hanoi University of Mining and Geology,Dept. Information Technology,Vietnam","institution_ids":["https://openalex.org/I29199639"]},{"raw_affiliation_string":"Dept. Information Technology, Hanoi University of Mining and Geology, Vietnam","institution_ids":["https://openalex.org/I29199639"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5003658949","display_name":"Trung-Hieu Nguyen","orcid":null},"institutions":[{"id":"https://openalex.org/I94518387","display_name":"Hanoi University of Science and Technology","ror":"https://ror.org/04nyv3z04","country_code":"VN","type":"education","lineage":["https://openalex.org/I94518387"]}],"countries":["VN"],"is_corresponding":false,"raw_author_name":"Trung-Hieu Nguyen","raw_affiliation_strings":["Hanoi University of Science and Technology,School of Information Technology,Vietnam","School of Information Technology, Hanoi University of Science and Technology, Vietnam"],"affiliations":[{"raw_affiliation_string":"Hanoi University of Science and Technology,School of Information Technology,Vietnam","institution_ids":["https://openalex.org/I94518387"]},{"raw_affiliation_string":"School of Information Technology, Hanoi University of Science and Technology, Vietnam","institution_ids":["https://openalex.org/I94518387"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101269024","display_name":"The Thanh-Dat Le","orcid":null},"institutions":[{"id":"https://openalex.org/I94518387","display_name":"Hanoi University of Science and Technology","ror":"https://ror.org/04nyv3z04","country_code":"VN","type":"education","lineage":["https://openalex.org/I94518387"]}],"countries":["VN"],"is_corresponding":false,"raw_author_name":"The Thanh-Dat Le","raw_affiliation_strings":["Hanoi University of Science and Technology,Computer Vision Department, MICA,Vietnam","Hanoi University of Science and Technology,School of Information Technology,Vietnam","School of Information Technology, Hanoi University of Science and Technology, Vietnam"],"affiliations":[{"raw_affiliation_string":"Hanoi University of Science and Technology,Computer Vision Department, MICA,Vietnam","institution_ids":["https://openalex.org/I94518387"]},{"raw_affiliation_string":"Hanoi University of Science and Technology,School of Information Technology,Vietnam","institution_ids":["https://openalex.org/I94518387"]},{"raw_affiliation_string":"School of Information Technology, Hanoi University of Science and Technology, Vietnam","institution_ids":["https://openalex.org/I94518387"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5052837369","display_name":"Thi-Lich Nghiem","orcid":"https://orcid.org/0000-0002-0378-4977"},"institutions":[{"id":"https://openalex.org/I94518387","display_name":"Hanoi University of Science and Technology","ror":"https://ror.org/04nyv3z04","country_code":"VN","type":"education","lineage":["https://openalex.org/I94518387"]},{"id":"https://openalex.org/I4210109591","display_name":"Thuongmai University","ror":"https://ror.org/021s58p89","country_code":"VN","type":"education","lineage":["https://openalex.org/I4210109591"]}],"countries":["VN"],"is_corresponding":false,"raw_author_name":"Thi-Lich Nghiem","raw_affiliation_strings":["Thuongmai University,Vietnam","Thuongmai University, Vietnam","Computer Vision Department, MICA, Hanoi University of Science and Technology, Vietnam"],"affiliations":[{"raw_affiliation_string":"Thuongmai University,Vietnam","institution_ids":["https://openalex.org/I4210109591"]},{"raw_affiliation_string":"Thuongmai University, Vietnam","institution_ids":["https://openalex.org/I4210109591"]},{"raw_affiliation_string":"Computer Vision Department, MICA, Hanoi University of Science and Technology, Vietnam","institution_ids":["https://openalex.org/I94518387"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5069820449","display_name":"Duc-Huy Pham","orcid":"https://orcid.org/0000-0002-5707-4137"},"institutions":[{"id":"https://openalex.org/I94518387","display_name":"Hanoi University of Science and Technology","ror":"https://ror.org/04nyv3z04","country_code":"VN","type":"education","lineage":["https://openalex.org/I94518387"]}],"countries":["VN"],"is_corresponding":false,"raw_author_name":"Duc-Huy Pham","raw_affiliation_strings":["Hanoi University of Science and Technology,School of Information Technology,Vietnam","School of Information Technology, Hanoi University of Science and Technology, Vietnam"],"affiliations":[{"raw_affiliation_string":"Hanoi University of Science and Technology,School of Information Technology,Vietnam","institution_ids":["https://openalex.org/I94518387"]},{"raw_affiliation_string":"School of Information Technology, Hanoi University of Science and Technology, Vietnam","institution_ids":["https://openalex.org/I94518387"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5067859504","display_name":"Thi\u2010Lan Le","orcid":"https://orcid.org/0000-0001-9541-3905"},"institutions":[{"id":"https://openalex.org/I94518387","display_name":"Hanoi University of Science and Technology","ror":"https://ror.org/04nyv3z04","country_code":"VN","type":"education","lineage":["https://openalex.org/I94518387"]}],"countries":["VN"],"is_corresponding":false,"raw_author_name":"Thi-Lan Le","raw_affiliation_strings":["Hanoi University of Science and Technology,Computer Vision Department, MICA,Vietnam","Hanoi University of Science and Technology,School of Information Technology,Vietnam","School of Electrical and Electronic Engineering, Hanoi University of Science and Technology, Vietnam","Computer Vision Department, MICA, Hanoi University of Science and Technology, Vietnam"],"affiliations":[{"raw_affiliation_string":"Hanoi University of Science and Technology,Computer Vision Department, MICA,Vietnam","institution_ids":["https://openalex.org/I94518387"]},{"raw_affiliation_string":"Hanoi University of Science and Technology,School of Information Technology,Vietnam","institution_ids":["https://openalex.org/I94518387"]},{"raw_affiliation_string":"School of Electrical and Electronic Engineering, Hanoi University of Science and Technology, Vietnam","institution_ids":["https://openalex.org/I94518387"]},{"raw_affiliation_string":"Computer Vision Department, MICA, Hanoi University of Science and Technology, Vietnam","institution_ids":["https://openalex.org/I94518387"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5003217635"],"corresponding_institution_ids":["https://openalex.org/I29199639"],"apc_list":null,"apc_paid":null,"fwci":0.2033,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.47224135,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"1296","last_page":"1303"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9966999888420105,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/audio-visual","display_name":"Audio visual","score":0.7476294636726379},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6433866024017334},{"id":"https://openalex.org/keywords/detector","display_name":"Detector","score":0.6416853070259094},{"id":"https://openalex.org/keywords/source-separation","display_name":"Source separation","score":0.5793325304985046},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.5737490653991699},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.3545742630958557},{"id":"https://openalex.org/keywords/multimedia","display_name":"Multimedia","score":0.18286943435668945},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.1170821487903595},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.07451421022415161}],"concepts":[{"id":"https://openalex.org/C3017588708","wikidata":"https://www.wikidata.org/wiki/Q758901","display_name":"Audio visual","level":2,"score":0.7476294636726379},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6433866024017334},{"id":"https://openalex.org/C94915269","wikidata":"https://www.wikidata.org/wiki/Q1834857","display_name":"Detector","level":2,"score":0.6416853070259094},{"id":"https://openalex.org/C2776864781","wikidata":"https://www.wikidata.org/wiki/Q52617913","display_name":"Source separation","level":2,"score":0.5793325304985046},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.5737490653991699},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.3545742630958557},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.18286943435668945},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.1170821487903595},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.07451421022415161},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/apsipaasc58517.2023.10317504","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/apsipaasc58517.2023.10317504","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 Asia Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320311649","display_name":"Ministry of Education","ror":"https://ror.org/036nq5137"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":29,"referenced_works":["https://openalex.org/W1591261915","https://openalex.org/W2031583051","https://openalex.org/W2046233597","https://openalex.org/W2127851351","https://openalex.org/W2150415460","https://openalex.org/W2285479626","https://openalex.org/W2619697695","https://openalex.org/W2756203131","https://openalex.org/W2780124704","https://openalex.org/W2891508564","https://openalex.org/W2952218014","https://openalex.org/W2962865004","https://openalex.org/W2963781481","https://openalex.org/W2984935418","https://openalex.org/W2988200020","https://openalex.org/W3015201698","https://openalex.org/W3017343282","https://openalex.org/W3096431533","https://openalex.org/W3099330747","https://openalex.org/W3102619627","https://openalex.org/W3103720336","https://openalex.org/W3104704316","https://openalex.org/W3116298410","https://openalex.org/W3182657421","https://openalex.org/W3210206437","https://openalex.org/W4213061196","https://openalex.org/W4289665794","https://openalex.org/W4312095965","https://openalex.org/W6784429643"],"related_works":["https://openalex.org/W2271369634","https://openalex.org/W3147472394","https://openalex.org/W2047100085","https://openalex.org/W2350550760","https://openalex.org/W578794879","https://openalex.org/W2625296515","https://openalex.org/W3137890128","https://openalex.org/W1984634519","https://openalex.org/W4245955731","https://openalex.org/W2393726419"],"abstract_inverted_index":{"Visual-guided":[0],"Audio":[1],"Source":[2],"Separation":[3],"(VASS)":[4],"is":[5,115],"the":[6,17,21,26,57,68,72,79,96,100,104,107,137,148,157,180,188],"task":[7],"that":[8],"deals":[9],"with":[10,117,187],"using":[11],"available":[12],"visual":[13,52,62,121],"information":[14,34],"to":[15,66,77,93,102,135,155],"guide":[16],"audio":[18,54,60,69,73,90,126,170],"separation":[19,75,80,91,190],"from":[20,175],"mixture":[22],"signal":[23],"consisting":[24],"of":[25,28,38,106,159,182],"sounds":[27,109],"many":[29],"simultaneous":[30],"sound":[31,39],"sources.":[32],"Visual":[33],"can":[35,63],"be":[36,64],"images":[37],"sources":[40],"(e.g.,":[41,49],"musical":[42,152],"instruments)":[43],"or":[44],"human":[45,160],"gestures":[46],"and":[47,56,61,125,132,163,166],"activities":[48],"musicians).":[50],"The":[51,112],"features,":[53,55],"correlation":[58],"between":[59],"used":[65],"estimate":[67,136],"mask":[70],"in":[71,169],"source":[74],"model":[76],"improve":[78],"performance.":[81],"In":[82],"this":[83],"study,":[84],"we":[85],"introduce":[86],"a":[87],"new":[88],"multi-modal":[89],"framework":[92,114],"jointly":[94],"train":[95],"processing":[97],"blocks":[98],"helping":[99],"network":[101],"find":[103],"features":[105,134],"target":[108],"more":[110],"optimally.":[111],"proposed":[113],"introduced":[116],"three":[118],"main":[119],"blocks:":[120],"extractor,":[122,124],"action":[123,185],"separator.":[127],"They":[128],"combine":[129],"visual,":[130],"action,":[131],"spectral":[133,140],"separated":[138],"sounds\u2019":[139],"masks.":[141],"Extensive":[142],"experiments":[143],"have":[144,178],"been":[145],"conducted":[146],"on":[147],"MUSIC":[149],"dataset":[150],"containing":[151],"performance":[153],"videos":[154],"evaluate":[156],"quality":[158],"joint":[161],"estimation":[162],"gesture":[164],"representation":[165],"their":[167],"role":[168],"separation.":[171],"Experimental":[172],"results":[173],"obtained":[174],"different":[176],"settings":[177],"confirmed":[179],"effectiveness":[181],"combining":[183],"an":[184],"extractor":[186],"audio-visual":[189],"model.":[191]},"counts_by_year":[{"year":2024,"cited_by_count":1}],"updated_date":"2025-12-21T23:12:01.093139","created_date":"2025-10-10T00:00:00"}
