{"id":"https://openalex.org/W4387772681","doi":"https://doi.org/10.3390/info14100575","title":"Generative Adversarial Networks (GANs) for Audio-Visual Speech Recognition in Artificial Intelligence IoT","display_name":"Generative Adversarial Networks (GANs) for Audio-Visual Speech Recognition in Artificial Intelligence IoT","publication_year":2023,"publication_date":"2023-10-19","ids":{"openalex":"https://openalex.org/W4387772681","doi":"https://doi.org/10.3390/info14100575"},"language":"en","primary_location":{"id":"doi:10.3390/info14100575","is_oa":true,"landing_page_url":"https://doi.org/10.3390/info14100575","pdf_url":"https://www.mdpi.com/2078-2489/14/10/575/pdf?version=1697725848","source":{"id":"https://openalex.org/S4210219776","display_name":"Information","issn_l":"2078-2489","issn":["2078-2489"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Information","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.mdpi.com/2078-2489/14/10/575/pdf?version=1697725848","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101865953","display_name":"Yibo He","orcid":"https://orcid.org/0000-0002-6306-0647"},"institutions":[{"id":"https://openalex.org/I69356397","display_name":"Xi\u2019an Jiaotong-Liverpool University","ror":"https://ror.org/03zmrmn05","country_code":"CN","type":"education","lineage":["https://openalex.org/I69356397"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yibo He","raw_affiliation_strings":["School of AI and Advanced Computing, Xi\u2019an Jiaotong Liverpool University, Suzhou 215000, China","School of AI and Advanced Computing, Xi'an Jiaotong Liverpool University, Suzhou 215000, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of AI and Advanced Computing, Xi\u2019an Jiaotong Liverpool University, Suzhou 215000, China","institution_ids":["https://openalex.org/I69356397"]},{"raw_affiliation_string":"School of AI and Advanced Computing, Xi'an Jiaotong Liverpool University, Suzhou 215000, China","institution_ids":["https://openalex.org/I69356397"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111525101","display_name":"Kah Phooi Seng","orcid":null},"institutions":[{"id":"https://openalex.org/I160993911","display_name":"Queensland University of Technology","ror":"https://ror.org/03pnv4752","country_code":"AU","type":"education","lineage":["https://openalex.org/I160993911"]},{"id":"https://openalex.org/I174025329","display_name":"University of the Sunshine Coast","ror":"https://ror.org/016gb9e15","country_code":"AU","type":"education","lineage":["https://openalex.org/I174025329"]},{"id":"https://openalex.org/I69356397","display_name":"Xi\u2019an Jiaotong-Liverpool University","ror":"https://ror.org/03zmrmn05","country_code":"CN","type":"education","lineage":["https://openalex.org/I69356397"]}],"countries":["AU","CN"],"is_corresponding":false,"raw_author_name":"Kah Phooi Seng","raw_affiliation_strings":["School of AI and Advanced Computing, Xi\u2019an Jiaotong Liverpool University, Suzhou 215000, China","School of Computer Science, Queensland University of Technology, Brisbane City, QLD 4000, Australia","School of Science Technology and Engineering, University of the Sunshine Coast, Sippy Downs, QLD 4556, Australia","School of AI and Advanced Computing, Xi'an Jiaotong Liverpool University, Suzhou 215000, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of AI and Advanced Computing, Xi\u2019an Jiaotong Liverpool University, Suzhou 215000, China","institution_ids":["https://openalex.org/I69356397"]},{"raw_affiliation_string":"School of Computer Science, Queensland University of Technology, Brisbane City, QLD 4000, Australia","institution_ids":["https://openalex.org/I160993911"]},{"raw_affiliation_string":"School of Science Technology and Engineering, University of the Sunshine Coast, Sippy Downs, QLD 4556, Australia","institution_ids":["https://openalex.org/I174025329"]},{"raw_affiliation_string":"School of AI and Advanced Computing, Xi'an Jiaotong Liverpool University, Suzhou 215000, China","institution_ids":["https://openalex.org/I69356397"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5073635669","display_name":"Li-Minn Ang","orcid":"https://orcid.org/0000-0002-2402-7529"},"institutions":[{"id":"https://openalex.org/I174025329","display_name":"University of the Sunshine Coast","ror":"https://ror.org/016gb9e15","country_code":"AU","type":"education","lineage":["https://openalex.org/I174025329"]}],"countries":["AU"],"is_corresponding":true,"raw_author_name":"Li Minn Ang","raw_affiliation_strings":["School of Science Technology and Engineering, University of the Sunshine Coast, Sippy Downs, QLD 4556, Australia"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Science Technology and Engineering, University of the Sunshine Coast, Sippy Downs, QLD 4556, Australia","institution_ids":["https://openalex.org/I174025329"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5073635669"],"corresponding_institution_ids":["https://openalex.org/I174025329"],"apc_list":{"value":1400,"currency":"CHF","value_usd":1515},"apc_paid":{"value":1400,"currency":"CHF","value_usd":1515},"fwci":4.1188,"has_fulltext":true,"cited_by_count":22,"citation_normalized_percentile":{"value":0.95296815,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":"14","issue":"10","first_page":"575","last_page":"575"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12357","display_name":"Digital Media Forensic Detection","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8204976916313171},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5598132014274597},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.44731929898262024},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.44317346811294556}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8204976916313171},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5598132014274597},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.44731929898262024},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.44317346811294556},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.3390/info14100575","is_oa":true,"landing_page_url":"https://doi.org/10.3390/info14100575","pdf_url":"https://www.mdpi.com/2078-2489/14/10/575/pdf?version=1697725848","source":{"id":"https://openalex.org/S4210219776","display_name":"Information","issn_l":"2078-2489","issn":["2078-2489"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Information","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:90bbfd48eb1c44e491d8fd5e4eb26595","is_oa":true,"landing_page_url":"https://doaj.org/article/90bbfd48eb1c44e491d8fd5e4eb26595","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Information, Vol 14, Iss 10, p 575 (2023)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.3390/info14100575","is_oa":true,"landing_page_url":"https://doi.org/10.3390/info14100575","pdf_url":"https://www.mdpi.com/2078-2489/14/10/575/pdf?version=1697725848","source":{"id":"https://openalex.org/S4210219776","display_name":"Information","issn_l":"2078-2489","issn":["2078-2489"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Information","raw_type":"journal-article"},"sustainable_development_goals":[{"display_name":"Affordable and clean energy","score":0.9200000166893005,"id":"https://metadata.un.org/sdg/7"}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4387772681.pdf"},"referenced_works_count":37,"referenced_works":["https://openalex.org/W589973208","https://openalex.org/W1849277567","https://openalex.org/W2007339694","https://openalex.org/W2070715707","https://openalex.org/W2105103777","https://openalex.org/W2117539524","https://openalex.org/W2121486117","https://openalex.org/W2124964692","https://openalex.org/W2155934075","https://openalex.org/W2163541010","https://openalex.org/W2248617261","https://openalex.org/W2345891197","https://openalex.org/W2607041014","https://openalex.org/W2618398196","https://openalex.org/W2622064152","https://openalex.org/W2783301370","https://openalex.org/W2890952074","https://openalex.org/W2891158090","https://openalex.org/W2898211994","https://openalex.org/W2900668286","https://openalex.org/W2921931055","https://openalex.org/W2946005997","https://openalex.org/W2962770929","https://openalex.org/W2962793481","https://openalex.org/W2962974533","https://openalex.org/W2963470893","https://openalex.org/W2963767194","https://openalex.org/W2972775954","https://openalex.org/W2981501041","https://openalex.org/W3035574324","https://openalex.org/W3043547428","https://openalex.org/W3081492798","https://openalex.org/W3101631197","https://openalex.org/W3101639073","https://openalex.org/W3180391059","https://openalex.org/W6678856934","https://openalex.org/W7011482893"],"related_works":["https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W2358668433","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W2382290278","https://openalex.org/W2350741829","https://openalex.org/W2530322880","https://openalex.org/W1596801655","https://openalex.org/W2359140296"],"abstract_inverted_index":{"This":[0],"paper":[1],"proposes":[2],"a":[3,40,108,111,117,191],"novel":[4],"multimodal":[5,42,81,101,170],"generative":[6],"adversarial":[7],"network":[8],"AVSR":[9,11,22,66,102,171,176],"(multimodal":[10],"GAN)":[12],"architecture,":[13],"to":[14,91,121],"improve":[15],"both":[16],"the":[17,21,93,122,127,150,158,168,175,185,195],"energy":[18],"efficiency":[19],"and":[20,50,71,110,126,147,153,163,189],"classification":[23,177],"accuracy":[24],"of":[25,29,54,80,96,107,114,187,194],"artificial":[26],"intelligence":[27],"Internet":[28],"things":[30],"(IoT)":[31],"applications.":[32],"The":[33,78,99,161],"audio-visual":[34],"speech":[35,60],"recognition":[36,61],"(AVSR)":[37],"modality":[38],"is":[39,45,105,116],"classical":[41],"modality,":[43],"which":[44,115],"commonly":[46],"used":[47,137],"in":[48,67,149,180],"IoT":[49,56,85,97],"embedded":[51],"systems.":[52],"Examples":[53],"suitable":[55],"applications":[57,73,86],"include":[58],"in-cabin":[59],"systems":[62],"for":[63,84],"driving":[64],"systems,":[65],"augmented":[68,138],"reality":[69],"environments,":[70],"interactive":[72],"such":[74],"as":[75],"virtual":[76],"aquariums.":[77],"application":[79],"sensor":[82],"data":[83,139],"requires":[87],"efficient":[88],"information":[89,125],"processing,":[90],"meet":[92],"hardware":[94],"constraints":[95],"devices.":[98],"proposed":[100,169,196],"GAN":[103,172],"architecture":[104,173],"composed":[106],"discriminator":[109],"generator,":[112],"each":[113],"two-stream":[118],"network,":[119],"corresponding":[120],"audio":[123],"stream":[124,129],"visual":[128],"information,":[130],"respectively.":[131],"To":[132],"validate":[133],"this":[134,181],"approach,":[135],"we":[136,183],"from":[140],"well-known":[141],"datasets":[142],"(LRS2-Lip":[143],"Reading":[144],"Sentences":[145],"2":[146],"LRS3)":[148],"training":[151],"process,":[152],"testing":[154],"was":[155],"performed":[156],"using":[157],"original":[159],"data.":[160],"research":[162],"experimental":[164],"results":[165],"showed":[166],"that":[167],"improved":[174],"accuracy.":[178],"Furthermore,":[179],"study,":[182],"discuss":[184],"domain":[186],"GANs":[188],"provide":[190],"concise":[192],"summary":[193],"GANs.":[197]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":11},{"year":2024,"cited_by_count":8},{"year":2023,"cited_by_count":1}],"updated_date":"2026-05-21T06:26:12.895304","created_date":"2025-10-10T00:00:00"}
