{"id":"https://openalex.org/W4391307457","doi":"https://doi.org/10.1109/icspcc59353.2023.10400336","title":"A Multimodal Target Speech Extraction Algorithm Based on Long Short Term Attention Mechanism","display_name":"A Multimodal Target Speech Extraction Algorithm Based on Long Short Term Attention Mechanism","publication_year":2023,"publication_date":"2023-11-14","ids":{"openalex":"https://openalex.org/W4391307457","doi":"https://doi.org/10.1109/icspcc59353.2023.10400336"},"language":"en","primary_location":{"id":"doi:10.1109/icspcc59353.2023.10400336","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/icspcc59353.2023.10400336","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 IEEE International Conference on Signal Processing, Communications and Computing (ICSPCC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100733780","display_name":"Yue Li","orcid":"https://orcid.org/0000-0002-7334-3712"},"institutions":[{"id":"https://openalex.org/I37796252","display_name":"Beijing University of Technology","ror":"https://ror.org/037b1pp87","country_code":"CN","type":"education","lineage":["https://openalex.org/I37796252"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yue Li","raw_affiliation_strings":["Beijing University of Technology,Facult of Information Technology,Department of Information and Communication Engineering,Beijing,China","Department of Information and Communication Engineering, Facult of Information Technology, Beijing University of Technology, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Beijing University of Technology,Facult of Information Technology,Department of Information and Communication Engineering,Beijing,China","institution_ids":["https://openalex.org/I37796252"]},{"raw_affiliation_string":"Department of Information and Communication Engineering, Facult of Information Technology, Beijing University of Technology, Beijing, China","institution_ids":["https://openalex.org/I37796252"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040147385","display_name":"Ruwei Li","orcid":"https://orcid.org/0000-0002-7828-2242"},"institutions":[{"id":"https://openalex.org/I37796252","display_name":"Beijing University of Technology","ror":"https://ror.org/037b1pp87","country_code":"CN","type":"education","lineage":["https://openalex.org/I37796252"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ruwei Li","raw_affiliation_strings":["Beijing University of Technology,Facult of Information Technology,Department of Information and Communication Engineering,Beijing,China","Department of Information and Communication Engineering, Facult of Information Technology, Beijing University of Technology, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Beijing University of Technology,Facult of Information Technology,Department of Information and Communication Engineering,Beijing,China","institution_ids":["https://openalex.org/I37796252"]},{"raw_affiliation_string":"Department of Information and Communication Engineering, Facult of Information Technology, Beijing University of Technology, Beijing, China","institution_ids":["https://openalex.org/I37796252"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100404494","display_name":"Man Li","orcid":"https://orcid.org/0000-0002-7545-2541"},"institutions":[{"id":"https://openalex.org/I37796252","display_name":"Beijing University of Technology","ror":"https://ror.org/037b1pp87","country_code":"CN","type":"education","lineage":["https://openalex.org/I37796252"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Man Li","raw_affiliation_strings":["Beijing University of Technology,Facult of Information Technology,Department of Information and Communication Engineering,Beijing,China","Department of Information and Communication Engineering, Facult of Information Technology, Beijing University of Technology, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Beijing University of Technology,Facult of Information Technology,Department of Information and Communication Engineering,Beijing,China","institution_ids":["https://openalex.org/I37796252"]},{"raw_affiliation_string":"Department of Information and Communication Engineering, Facult of Information Technology, Beijing University of Technology, Beijing, China","institution_ids":["https://openalex.org/I37796252"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5100733780"],"corresponding_institution_ids":["https://openalex.org/I37796252"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.19689822,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"30","issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9520000219345093,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9520000219345093,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9484000205993652,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7678704261779785},{"id":"https://openalex.org/keywords/term","display_name":"Term (time)","score":0.7194777727127075},{"id":"https://openalex.org/keywords/mechanism","display_name":"Mechanism (biology)","score":0.5769216418266296},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.4435375928878784},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3768622875213623},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.37311244010925293}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7678704261779785},{"id":"https://openalex.org/C61797465","wikidata":"https://www.wikidata.org/wiki/Q1188986","display_name":"Term (time)","level":2,"score":0.7194777727127075},{"id":"https://openalex.org/C89611455","wikidata":"https://www.wikidata.org/wiki/Q6804646","display_name":"Mechanism (biology)","level":2,"score":0.5769216418266296},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.4435375928878784},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3768622875213623},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.37311244010925293},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icspcc59353.2023.10400336","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/icspcc59353.2023.10400336","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 IEEE International Conference on Signal Processing, Communications and Computing (ICSPCC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":24,"referenced_works":["https://openalex.org/W335832833","https://openalex.org/W1522301498","https://openalex.org/W1991139021","https://openalex.org/W2014621385","https://openalex.org/W2081144555","https://openalex.org/W2136372468","https://openalex.org/W2563356726","https://openalex.org/W2767290858","https://openalex.org/W2886232760","https://openalex.org/W2952218014","https://openalex.org/W2964058413","https://openalex.org/W2964171275","https://openalex.org/W2964207404","https://openalex.org/W3008400075","https://openalex.org/W3035570025","https://openalex.org/W3099330747","https://openalex.org/W3163287738","https://openalex.org/W3163652268","https://openalex.org/W3182657421","https://openalex.org/W4232747912","https://openalex.org/W4289665794","https://openalex.org/W4385245566","https://openalex.org/W6631190155","https://openalex.org/W6739901393"],"related_works":["https://openalex.org/W2051487156","https://openalex.org/W2073681303","https://openalex.org/W2382997850","https://openalex.org/W2390968135","https://openalex.org/W2382213751","https://openalex.org/W2351750670","https://openalex.org/W2544423928","https://openalex.org/W1597848696","https://openalex.org/W2354715126","https://openalex.org/W2388563748"],"abstract_inverted_index":{"Most":[0],"of":[1,21,24,59,75,153],"the":[2,57,78,82,87,92,97,105,114,121,125,138,144],"existing":[3],"audio-video":[4],"fusion":[5],"mechanisms":[6],"are":[7,63,94],"fused":[8],"by":[9,65,96],"directly":[10],"splicing":[11],"or":[12],"summing":[13],"audio-visual":[14],"cues,":[15],"which":[16],"cannot":[17],"make":[18],"full":[19],"use":[20],"both":[22],"kinds":[23],"information":[25],"to":[26,31,123],"work":[27],"together.":[28],"In":[29,53],"order":[30],"solve":[32],"this":[33,51,54],"problem,":[34],"a":[35],"multimodal":[36],"target":[37,107,115,127],"speech":[38,108,116,154],"extraction":[39],"algorithm":[40],"based":[41],"on":[42,77],"long":[43,88,98],"short":[44,83,99],"term":[45,84,89,100],"attention":[46,101],"mechanism":[47,102],"is":[48,111],"proposed":[49,139],"in":[50,143,161],"paper.":[52],"algorithm,":[55],"firstly,":[56],"features":[58],"audio":[60],"and":[61,69,86,118,150,157],"lip":[62],"extracted":[64],"convolutional":[66],"neural":[67],"network":[68],"chunked":[70],"with":[71,113,135],"an":[72],"overlap":[73],"factor":[74],"50%":[76],"time":[79],"axis.":[80],"Secondly,":[81],"correlation":[85,90],"between":[91],"sequences":[93],"calculated":[95],"(LSTA).":[103],"Finally,":[104],"obtained":[106],"mask":[109],"sequence":[110,117],"multiplied":[112],"passed":[119],"through":[120],"decoder":[122],"obtain":[124],"estimated":[126],"speaker":[128],"speech.":[129],"Experimental":[130],"results":[131],"show":[132],"that":[133],"compared":[134],"contrast":[136],"algorithms,":[137],"method":[140],"performs":[141],"better":[142],"scale-invariant":[145],"signal-to-noise":[146],"ratio":[147],"improvement":[148,160],"(SI-SNRi)":[149],"perceptual":[151],"evaluation":[152],"quality":[155],"(PESQ),":[156],"achieves":[158],"consistent":[159],"cross-dataset":[162],"evaluation.":[163]},"counts_by_year":[],"updated_date":"2025-12-21T01:58:51.020947","created_date":"2025-10-10T00:00:00"}
