{"id":"https://openalex.org/W3216337268","doi":"https://doi.org/10.1007/s10994-021-06112-5","title":"Bimodal variational autoencoder for audiovisual speech recognition","display_name":"Bimodal variational autoencoder for audiovisual speech recognition","publication_year":2021,"publication_date":"2021-11-24","ids":{"openalex":"https://openalex.org/W3216337268","doi":"https://doi.org/10.1007/s10994-021-06112-5","mag":"3216337268"},"language":"en","primary_location":{"id":"doi:10.1007/s10994-021-06112-5","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10994-021-06112-5","pdf_url":"https://link.springer.com/content/pdf/10.1007/s10994-021-06112-5.pdf","source":{"id":"https://openalex.org/S62148650","display_name":"Machine Learning","issn_l":"0885-6125","issn":["0885-6125","1573-0565"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Machine Learning","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://link.springer.com/content/pdf/10.1007/s10994-021-06112-5.pdf","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5079842397","display_name":"Hadeer M. Sayed","orcid":null},"institutions":[{"id":"https://openalex.org/I66513531","display_name":"Fayoum University","ror":"https://ror.org/023gzwx10","country_code":"EG","type":"education","lineage":["https://openalex.org/I66513531"]}],"countries":["EG"],"is_corresponding":true,"raw_author_name":"Hadeer M. Sayed","raw_affiliation_strings":["Department of Computer Science, Fayoum University, Fayoum, Egypt"],"raw_orcid":"https://orcid.org/0000-0002-6136-4823","affiliations":[{"raw_affiliation_string":"Department of Computer Science, Fayoum University, Fayoum, Egypt","institution_ids":["https://openalex.org/I66513531"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5017706445","display_name":"Hesham Eldeeb","orcid":"https://orcid.org/0000-0001-9669-5991"},"institutions":[{"id":"https://openalex.org/I4210156128","display_name":"Electronics Research Institute","ror":"https://ror.org/0532wcf75","country_code":"EG","type":"facility","lineage":["https://openalex.org/I4210094263","https://openalex.org/I4210156128"]}],"countries":["EG"],"is_corresponding":false,"raw_author_name":"Hesham E. ElDeeb","raw_affiliation_strings":["Department of Computer and Control, Electronics Research Institute, Cairo, Egypt"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Computer and Control, Electronics Research Institute, Cairo, Egypt","institution_ids":["https://openalex.org/I4210156128"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5033608640","display_name":"Shereen A. Taie","orcid":null},"institutions":[{"id":"https://openalex.org/I66513531","display_name":"Fayoum University","ror":"https://ror.org/023gzwx10","country_code":"EG","type":"education","lineage":["https://openalex.org/I66513531"]}],"countries":["EG"],"is_corresponding":false,"raw_author_name":"Shereen A. Taie","raw_affiliation_strings":["Department of Computer Science, Fayoum University, Fayoum, Egypt"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Computer Science, Fayoum University, Fayoum, Egypt","institution_ids":["https://openalex.org/I66513531"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5079842397"],"corresponding_institution_ids":["https://openalex.org/I66513531"],"apc_list":{"value":2390,"currency":"EUR","value_usd":2990},"apc_paid":{"value":2390,"currency":"EUR","value_usd":2990},"fwci":2.159,"has_fulltext":true,"cited_by_count":21,"citation_normalized_percentile":{"value":0.88546963,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":"112","issue":"4","first_page":"1201","last_page":"1226"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9882000088691711,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/autoencoder","display_name":"Autoencoder","score":0.7337332367897034},{"id":"https://openalex.org/keywords/modality","display_name":"Modality (human\u2013computer interaction)","score":0.7194417715072632},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7015489339828491},{"id":"https://openalex.org/keywords/modalities","display_name":"Modalities","score":0.6757622957229614},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6350769996643066},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6329388618469238},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.5825726985931396},{"id":"https://openalex.org/keywords/support-vector-machine","display_name":"Support vector machine","score":0.539371132850647},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.4752987325191498},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.47432631254196167},{"id":"https://openalex.org/keywords/fusion","display_name":"Fusion","score":0.4670257568359375},{"id":"https://openalex.org/keywords/signal","display_name":"SIGNAL (programming language)","score":0.44478142261505127},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.41556161642074585},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3349294662475586}],"concepts":[{"id":"https://openalex.org/C101738243","wikidata":"https://www.wikidata.org/wiki/Q786435","display_name":"Autoencoder","level":3,"score":0.7337332367897034},{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.7194417715072632},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7015489339828491},{"id":"https://openalex.org/C2779903281","wikidata":"https://www.wikidata.org/wiki/Q6888026","display_name":"Modalities","level":2,"score":0.6757622957229614},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6350769996643066},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6329388618469238},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.5825726985931396},{"id":"https://openalex.org/C12267149","wikidata":"https://www.wikidata.org/wiki/Q282453","display_name":"Support vector machine","level":2,"score":0.539371132850647},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.4752987325191498},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.47432631254196167},{"id":"https://openalex.org/C158525013","wikidata":"https://www.wikidata.org/wiki/Q2593739","display_name":"Fusion","level":2,"score":0.4670257568359375},{"id":"https://openalex.org/C2779843651","wikidata":"https://www.wikidata.org/wiki/Q7390335","display_name":"SIGNAL (programming language)","level":2,"score":0.44478142261505127},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.41556161642074585},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3349294662475586},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C36289849","wikidata":"https://www.wikidata.org/wiki/Q34749","display_name":"Social science","level":1,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1007/s10994-021-06112-5","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10994-021-06112-5","pdf_url":"https://link.springer.com/content/pdf/10.1007/s10994-021-06112-5.pdf","source":{"id":"https://openalex.org/S62148650","display_name":"Machine Learning","issn_l":"0885-6125","issn":["0885-6125","1573-0565"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Machine Learning","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1007/s10994-021-06112-5","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10994-021-06112-5","pdf_url":"https://link.springer.com/content/pdf/10.1007/s10994-021-06112-5.pdf","source":{"id":"https://openalex.org/S62148650","display_name":"Machine Learning","issn_l":"0885-6125","issn":["0885-6125","1573-0565"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Machine Learning","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3216337268.pdf","grobid_xml":"https://content.openalex.org/works/W3216337268.grobid-xml"},"referenced_works_count":69,"referenced_works":["https://openalex.org/W22229905","https://openalex.org/W107619411","https://openalex.org/W141678749","https://openalex.org/W1498436455","https://openalex.org/W1524333225","https://openalex.org/W1526392145","https://openalex.org/W1566256432","https://openalex.org/W1959608418","https://openalex.org/W1972340877","https://openalex.org/W1977863971","https://openalex.org/W2016661008","https://openalex.org/W2026012689","https://openalex.org/W2031614119","https://openalex.org/W2058961190","https://openalex.org/W2064675550","https://openalex.org/W2087681821","https://openalex.org/W2096391593","https://openalex.org/W2098923380","https://openalex.org/W2102409316","https://openalex.org/W2110798204","https://openalex.org/W2116064496","https://openalex.org/W2119821739","https://openalex.org/W2122538988","https://openalex.org/W2148154194","https://openalex.org/W2157331557","https://openalex.org/W2158559550","https://openalex.org/W2161969291","https://openalex.org/W2184188583","https://openalex.org/W2227969856","https://openalex.org/W2253677904","https://openalex.org/W2325939864","https://openalex.org/W2341069961","https://openalex.org/W2415856357","https://openalex.org/W2467604901","https://openalex.org/W2470413457","https://openalex.org/W2514741789","https://openalex.org/W2557283755","https://openalex.org/W2613677041","https://openalex.org/W2618530766","https://openalex.org/W2619383789","https://openalex.org/W2747876639","https://openalex.org/W2755889034","https://openalex.org/W2809254203","https://openalex.org/W2885130944","https://openalex.org/W2890952074","https://openalex.org/W2902784055","https://openalex.org/W2935794029","https://openalex.org/W2963173382","https://openalex.org/W2963192365","https://openalex.org/W2963641944","https://openalex.org/W2963839617","https://openalex.org/W2976594877","https://openalex.org/W2982391067","https://openalex.org/W3010398094","https://openalex.org/W3011727199","https://openalex.org/W3014849434","https://openalex.org/W3015383493","https://openalex.org/W3081597552","https://openalex.org/W3104211051","https://openalex.org/W3143803406","https://openalex.org/W3154825838","https://openalex.org/W4239510810","https://openalex.org/W4245749767","https://openalex.org/W6631362777","https://openalex.org/W6640963894","https://openalex.org/W6682648773","https://openalex.org/W6686207219","https://openalex.org/W6730267373","https://openalex.org/W6786176549"],"related_works":["https://openalex.org/W73545470","https://openalex.org/W4224266612","https://openalex.org/W2383394264","https://openalex.org/W4320153225","https://openalex.org/W4293261942","https://openalex.org/W3125968744","https://openalex.org/W2004831463","https://openalex.org/W2167701463","https://openalex.org/W2110287964","https://openalex.org/W4307407935"],"abstract_inverted_index":{"Multimodal":[0],"fusion":[1,19,135],"is":[2,20,63,68,114,178,187],"the":[3,23,55,73,80,97,100,104,125,128,137,158,162,174,183,192],"idea":[4],"of":[5,12,17,25,99,106,127,132,164],"combining":[6],"information":[7],"in":[8,49,103,161],"a":[9,35,50],"joint":[10],"representation":[11],"multiple":[13],"modalities.":[14],"The":[15,65,94,121],"goal":[16],"multimodal":[18],"to":[21],"improve":[22],"accuracy":[24,143,168,195],"results":[26,123],"from":[27],"classification":[28],"or":[29,112],"regression":[30],"tasks.":[31],"This":[32],"paper":[33],"proposes":[34],"Bimodal":[36],"Variational":[37],"Autoencoder":[38],"(BiVAE)":[39,131],"model":[40,67,130],"for":[41,150],"audiovisual":[42,47,82,133],"features":[43,102,134],"fusion.":[44],"Reliance":[45],"on":[46,72],"signals":[48],"speech":[51],"recognition":[52,56,194],"task":[53],"increases":[54],"accuracy,":[57],"especially":[58],"when":[59,173,182],"an":[60,141,167],"audio":[61,176],"signal":[62,177,186],"corrupted.":[64],"BiVAE":[66,156],"trained":[69],"and":[70,90,148,152,180],"validated":[71],"CUAVE":[74],"dataset.":[75],"Three":[76],"classifiers":[77],"have":[78],"evaluated":[79],"fused":[81,101],"features:":[83],"Long-short":[84],"Term":[85],"Memory,":[86],"Deep":[87],"Neural":[88],"Network,":[89],"Support":[91],"Vector":[92],"Machine.":[93],"experiment":[95],"involves":[96],"evaluation":[98],"case":[105,163],"whether":[107],"two":[108],"modalities":[109],"are":[110],"available":[111,118,179],"there":[113],"only":[115,175,184],"one":[116],"modality":[117],"(i.e.,":[119],"cross-modality).":[120],"experimental":[122],"display":[124],"superiority":[126],"proposed":[129],"over":[136],"state-of-the-art":[138,159],"models":[139,160],"by":[140,166],"average":[142],"difference":[144,169],"$$\\simeq":[145,170],"$$":[146,171],"3.28%":[147],"13.28%":[149],"clean":[151],"noisy,":[153],"respectively.":[154],"Additionally,":[155],"outperforms":[157],"cross-modality":[165],"2.79%":[172],"1.88%":[181],"video":[185],"available.":[188],"Furthermore,":[189],"SVM":[190],"satisfies":[191],"best":[193],"compared":[196],"with":[197],"other":[198],"classifiers.":[199]},"counts_by_year":[{"year":2026,"cited_by_count":3},{"year":2025,"cited_by_count":4},{"year":2024,"cited_by_count":7},{"year":2023,"cited_by_count":6},{"year":2022,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
