{"id":"https://openalex.org/W2991542629","doi":"https://doi.org/10.1109/icassp40776.2020.9054697","title":"An Analysis of Speech Enhancement and Recognition Losses in Limited Resources Multi-Talker Single Channel Audio-Visual ASR","display_name":"An Analysis of Speech Enhancement and Recognition Losses in Limited Resources Multi-Talker Single Channel Audio-Visual ASR","publication_year":2020,"publication_date":"2020-04-09","ids":{"openalex":"https://openalex.org/W2991542629","doi":"https://doi.org/10.1109/icassp40776.2020.9054697","mag":"2991542629"},"language":"en","primary_location":{"id":"doi:10.1109/icassp40776.2020.9054697","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp40776.2020.9054697","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2020 - 2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/1904.08248","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5065003610","display_name":"Luca Pasa","orcid":"https://orcid.org/0000-0002-3023-3046"},"institutions":[{"id":"https://openalex.org/I4210133238","display_name":"Center for Translational Neurophysiology of Speech and Communication","ror":"https://ror.org/02nzxcb82","country_code":"IT","type":"facility","lineage":["https://openalex.org/I30771326","https://openalex.org/I4210133238"]},{"id":"https://openalex.org/I30771326","display_name":"Italian Institute of Technology","ror":"https://ror.org/042t93s57","country_code":"IT","type":"facility","lineage":["https://openalex.org/I30771326"]}],"countries":["IT"],"is_corresponding":true,"raw_author_name":"Luca Pasa","raw_affiliation_strings":["Istituto Italiano di Tecnologia,Ferrara,Italy","[Istituto Italiano di Tecnologia, Ferrara, Italy]"],"affiliations":[{"raw_affiliation_string":"Istituto Italiano di Tecnologia,Ferrara,Italy","institution_ids":["https://openalex.org/I4210133238","https://openalex.org/I30771326"]},{"raw_affiliation_string":"[Istituto Italiano di Tecnologia, Ferrara, Italy]","institution_ids":["https://openalex.org/I30771326"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040930642","display_name":"Giovanni Morrone","orcid":"https://orcid.org/0000-0003-2163-1779"},"institutions":[{"id":"https://openalex.org/I122346577","display_name":"University of Modena and Reggio Emilia","ror":"https://ror.org/02d4c4y02","country_code":"IT","type":"education","lineage":["https://openalex.org/I122346577"]},{"id":"https://openalex.org/I4210161797","display_name":"Ferrari (Italy)","ror":"https://ror.org/05p859a12","country_code":"IT","type":"company","lineage":["https://openalex.org/I4210161797"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Giovanni Morrone","raw_affiliation_strings":["University of Modena and Reggio Emilia,Department of Engineering \"Enzo Ferrari\",Modena,Italy","University of Modena and Reggio Emilia Department of Engineering \u201cEnzo Ferrari\u201d Modena Italy"],"affiliations":[{"raw_affiliation_string":"University of Modena and Reggio Emilia,Department of Engineering \"Enzo Ferrari\",Modena,Italy","institution_ids":["https://openalex.org/I122346577","https://openalex.org/I4210161797"]},{"raw_affiliation_string":"University of Modena and Reggio Emilia Department of Engineering \u201cEnzo Ferrari\u201d Modena Italy","institution_ids":["https://openalex.org/I122346577","https://openalex.org/I4210161797"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5063165262","display_name":"Leonardo Badino","orcid":"https://orcid.org/0000-0001-7037-5914"},"institutions":[{"id":"https://openalex.org/I4210133238","display_name":"Center for Translational Neurophysiology of Speech and Communication","ror":"https://ror.org/02nzxcb82","country_code":"IT","type":"facility","lineage":["https://openalex.org/I30771326","https://openalex.org/I4210133238"]},{"id":"https://openalex.org/I30771326","display_name":"Italian Institute of Technology","ror":"https://ror.org/042t93s57","country_code":"IT","type":"facility","lineage":["https://openalex.org/I30771326"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Leonardo Badino","raw_affiliation_strings":["Istituto Italiano di Tecnologia,Ferrara,Italy","[Istituto Italiano di Tecnologia, Ferrara, Italy]"],"affiliations":[{"raw_affiliation_string":"Istituto Italiano di Tecnologia,Ferrara,Italy","institution_ids":["https://openalex.org/I4210133238","https://openalex.org/I30771326"]},{"raw_affiliation_string":"[Istituto Italiano di Tecnologia, Ferrara, Italy]","institution_ids":["https://openalex.org/I30771326"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5065003610"],"corresponding_institution_ids":["https://openalex.org/I30771326","https://openalex.org/I4210133238"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.00798273,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"7309","last_page":"7313"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11233","display_name":"Advanced Adaptive Filtering Techniques","score":0.9912999868392944,"subfield":{"id":"https://openalex.org/subfields/2206","display_name":"Computational Mechanics"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8356426954269409},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.788666307926178},{"id":"https://openalex.org/keywords/phone","display_name":"Phone","score":0.6877555251121521},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.628715991973877},{"id":"https://openalex.org/keywords/timit","display_name":"TIMIT","score":0.5366238355636597},{"id":"https://openalex.org/keywords/channel","display_name":"Channel (broadcasting)","score":0.4707247316837311},{"id":"https://openalex.org/keywords/word-error-rate","display_name":"Word error rate","score":0.45580390095710754},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3202113211154938},{"id":"https://openalex.org/keywords/hidden-markov-model","display_name":"Hidden Markov model","score":0.19688892364501953}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8356426954269409},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.788666307926178},{"id":"https://openalex.org/C2778707766","wikidata":"https://www.wikidata.org/wiki/Q202064","display_name":"Phone","level":2,"score":0.6877555251121521},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.628715991973877},{"id":"https://openalex.org/C2778724510","wikidata":"https://www.wikidata.org/wiki/Q7670405","display_name":"TIMIT","level":3,"score":0.5366238355636597},{"id":"https://openalex.org/C127162648","wikidata":"https://www.wikidata.org/wiki/Q16858953","display_name":"Channel (broadcasting)","level":2,"score":0.4707247316837311},{"id":"https://openalex.org/C40969351","wikidata":"https://www.wikidata.org/wiki/Q3516228","display_name":"Word error rate","level":2,"score":0.45580390095710754},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3202113211154938},{"id":"https://openalex.org/C23224414","wikidata":"https://www.wikidata.org/wiki/Q176769","display_name":"Hidden Markov model","level":2,"score":0.19688892364501953},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0}],"mesh":[],"locations_count":5,"locations":[{"id":"doi:10.1109/icassp40776.2020.9054697","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp40776.2020.9054697","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2020 - 2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:1904.08248","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1904.08248","pdf_url":"https://arxiv.org/pdf/1904.08248","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"mag:2991542629","is_oa":true,"landing_page_url":"https://arxiv.org/pdf/1904.08248.pdf","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"doi:10.48550/arxiv.1904.08248","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.1904.08248","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"},{"id":"doi:10.17023/h6m8-7950","is_oa":true,"landing_page_url":"https://doi.org/10.17023/h6m8-7950","pdf_url":null,"source":{"id":"https://openalex.org/S7407051697","display_name":"IEEE RESOURCE CENTERS","issn_l":null,"issn":[],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:1904.08248","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1904.08248","pdf_url":"https://arxiv.org/pdf/1904.08248","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":47,"referenced_works":["https://openalex.org/W1522301498","https://openalex.org/W1971168548","https://openalex.org/W1991139021","https://openalex.org/W2015143272","https://openalex.org/W2029199293","https://openalex.org/W2035576074","https://openalex.org/W2057826000","https://openalex.org/W2079735306","https://openalex.org/W2087681821","https://openalex.org/W2115252128","https://openalex.org/W2127141656","https://openalex.org/W2143104527","https://openalex.org/W2397728357","https://openalex.org/W2460742184","https://openalex.org/W2551572271","https://openalex.org/W2558649592","https://openalex.org/W2578229578","https://openalex.org/W2734774145","https://openalex.org/W2742079690","https://openalex.org/W2749510669","https://openalex.org/W2755891984","https://openalex.org/W2890952074","https://openalex.org/W2891833136","https://openalex.org/W2900114706","https://openalex.org/W2900292050","https://openalex.org/W2901702433","https://openalex.org/W2952746495","https://openalex.org/W2962715207","https://openalex.org/W2962935966","https://openalex.org/W2962960500","https://openalex.org/W2963082324","https://openalex.org/W2963477857","https://openalex.org/W2963843276","https://openalex.org/W2964171275","https://openalex.org/W2964243145","https://openalex.org/W2972568703","https://openalex.org/W2973062255","https://openalex.org/W3023071679","https://openalex.org/W3123318516","https://openalex.org/W4289665794","https://openalex.org/W6712476441","https://openalex.org/W6732872814","https://openalex.org/W6742058293","https://openalex.org/W6744261651","https://openalex.org/W6750591037","https://openalex.org/W6754392867","https://openalex.org/W6756222830"],"related_works":["https://openalex.org/W2938662609","https://openalex.org/W3163142165","https://openalex.org/W3169723574","https://openalex.org/W3203028475","https://openalex.org/W3186546663","https://openalex.org/W3088159194","https://openalex.org/W2790326622","https://openalex.org/W2905699315","https://openalex.org/W2572023240","https://openalex.org/W3161697668","https://openalex.org/W3035299099","https://openalex.org/W2115835897","https://openalex.org/W2890505457","https://openalex.org/W2889624961","https://openalex.org/W2888868298","https://openalex.org/W3206706278","https://openalex.org/W2984787691","https://openalex.org/W2972775954","https://openalex.org/W3198413388","https://openalex.org/W2972592847"],"abstract_inverted_index":{"In":[0],"this":[1],"paper,":[2],"we":[3,22,40,135],"analyzed":[4,57,120],"how":[5,42,48],"audio-visual":[6,110],"speech":[7,33,79],"enhancement":[8,34,80],"can":[9],"help":[10],"to":[11,49,108,115],"perform":[12,30,116],"the":[13,43,54,75,78,83,90,94,102,109,121,124,137],"ASR":[14,76],"task":[15,77],"in":[16,133],"a":[17,98],"cocktail":[18],"party":[19],"scenario.":[20],"Therefore":[21],"considered":[23],"two":[24,44,95,129],"simple":[25],"end-to-end":[26],"LSTM-based":[27],"models":[28,45,112,126],"that":[29,61,71],"single-channel":[31],"audiovisual":[32],"and":[35,47,65,87,132,142],"phone":[36,117],"recognition":[37],"respectively.":[38],"Then,":[39],"studied":[41],"interact,":[46],"train":[50],"them":[51],"jointly":[52],"affects":[53],"final":[55],"result.We":[56],"different":[58],"training":[59],"strategies":[60],"reveal":[62],"some":[63],"interesting":[64],"unexpected":[66],"behaviors.":[67],"The":[68],"experiments":[69],"show":[70],"during":[72],"optimization":[73,92],"of":[74,82,93,101,123,140],"capability":[81],"model":[84],"significantly":[85],"decreases":[86],"vice-versa.":[88],"Nevertheless":[89],"joint":[91],"tasks":[96],"shows":[97],"remarkable":[99],"drop":[100],"Phone":[103],"Error":[104],"Rate":[105],"(PER)":[106],"compared":[107],"baseline":[111],"trained":[113],"only":[114],"recognition.":[118],"We":[119],"behaviors":[122],"proposed":[125],"by":[127],"using":[128],"limited-size":[130],"datasets,":[131],"particular":[134],"used":[136],"mixed-speech":[138],"versions":[139],"GRID":[141],"TCD-TIMIT.":[143]},"counts_by_year":[],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2025-10-10T00:00:00"}
