{"id":"https://openalex.org/W4410226946","doi":"https://doi.org/10.1109/jstsp.2025.3568585","title":"Enhanced Multimodal Speech Processing for Healthcare Applications: A Deep Fusion Approach","display_name":"Enhanced Multimodal Speech Processing for Healthcare Applications: A Deep Fusion Approach","publication_year":2025,"publication_date":"2025-05-01","ids":{"openalex":"https://openalex.org/W4410226946","doi":"https://doi.org/10.1109/jstsp.2025.3568585"},"language":"en","primary_location":{"id":"doi:10.1109/jstsp.2025.3568585","is_oa":false,"landing_page_url":"https://doi.org/10.1109/jstsp.2025.3568585","pdf_url":null,"source":{"id":"https://openalex.org/S42167783","display_name":"IEEE Journal of Selected Topics in Signal Processing","issn_l":"1932-4553","issn":["1932-4553","1941-0484"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Journal of Selected Topics in Signal Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Jianhui Lv","orcid":"https://orcid.org/0000-0003-0884-6601"},"institutions":[{"id":"https://openalex.org/I85430964","display_name":"Jinzhou Medical University","ror":"https://ror.org/02yd1yr68","country_code":"CN","type":"education","lineage":["https://openalex.org/I85430964"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Jianhui Lv","raw_affiliation_strings":["The First Affiliated Hospital of Jinzhou Medical University, Jinzhou, China"],"affiliations":[{"raw_affiliation_string":"The First Affiliated Hospital of Jinzhou Medical University, Jinzhou, China","institution_ids":["https://openalex.org/I85430964"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5042123158","display_name":"Wadii Boulila","orcid":"https://orcid.org/0000-0003-2133-0757"},"institutions":[{"id":"https://openalex.org/I142024983","display_name":"Prince Sultan University","ror":"https://ror.org/053mqrf26","country_code":"SA","type":"education","lineage":["https://openalex.org/I142024983"]}],"countries":["SA"],"is_corresponding":false,"raw_author_name":"Wadii Boulila","raw_affiliation_strings":["Robotics and Internet-of-Things Laboratory, Prince Sultan University, Riyadh, Saudi Arabia"],"affiliations":[{"raw_affiliation_string":"Robotics and Internet-of-Things Laboratory, Prince Sultan University, Riyadh, Saudi Arabia","institution_ids":["https://openalex.org/I142024983"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004251248","display_name":"Shalli Rani","orcid":"https://orcid.org/0000-0002-8474-9435"},"institutions":[{"id":"https://openalex.org/I74319210","display_name":"Chitkara University","ror":"https://ror.org/057d6z539","country_code":"IN","type":"education","lineage":["https://openalex.org/I74319210"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Shalli Rani","raw_affiliation_strings":["Chitkara University Institute of Engineering and Technology, Chitkara University, Punjab, India"],"affiliations":[{"raw_affiliation_string":"Chitkara University Institute of Engineering and Technology, Chitkara University, Punjab, India","institution_ids":["https://openalex.org/I74319210"]}]},{"author_position":"last","author":{"id":null,"display_name":"Huamao Jiang","orcid":"https://orcid.org/0009-0008-7936-9041"},"institutions":[{"id":"https://openalex.org/I85430964","display_name":"Jinzhou Medical University","ror":"https://ror.org/02yd1yr68","country_code":"CN","type":"education","lineage":["https://openalex.org/I85430964"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Huamao Jiang","raw_affiliation_strings":["The First Affiliated Hospital of Jinzhou Medical University, Jinzhou, China"],"affiliations":[{"raw_affiliation_string":"The First Affiliated Hospital of Jinzhou Medical University, Jinzhou, China","institution_ids":["https://openalex.org/I85430964"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I85430964"],"apc_list":null,"apc_paid":null,"fwci":2.3568,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.88965202,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":"19","issue":"4","first_page":"600","last_page":"612"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.8662999868392944,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.8662999868392944,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.7480999827384949,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6721205115318298},{"id":"https://openalex.org/keywords/health-care","display_name":"Health care","score":0.5626323223114014},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.48546937108039856},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4597058892250061},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.41518086194992065}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6721205115318298},{"id":"https://openalex.org/C160735492","wikidata":"https://www.wikidata.org/wiki/Q31207","display_name":"Health care","level":2,"score":0.5626323223114014},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.48546937108039856},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4597058892250061},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.41518086194992065},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C50522688","wikidata":"https://www.wikidata.org/wiki/Q189833","display_name":"Economic growth","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/jstsp.2025.3568585","is_oa":false,"landing_page_url":"https://doi.org/10.1109/jstsp.2025.3568585","pdf_url":null,"source":{"id":"https://openalex.org/S42167783","display_name":"IEEE Journal of Selected Topics in Signal Processing","issn_l":"1932-4553","issn":["1932-4553","1941-0484"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Journal of Selected Topics in Signal Processing","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G6402621496","display_name":null,"funder_award_id":"62202247","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":38,"referenced_works":["https://openalex.org/W2945191446","https://openalex.org/W3116242343","https://openalex.org/W3119048198","https://openalex.org/W4226107067","https://openalex.org/W4229003981","https://openalex.org/W4294892047","https://openalex.org/W4295185249","https://openalex.org/W4297504231","https://openalex.org/W4303983745","https://openalex.org/W4308977069","https://openalex.org/W4309573071","https://openalex.org/W4311322277","https://openalex.org/W4313251276","https://openalex.org/W4313598803","https://openalex.org/W4321749508","https://openalex.org/W4322744472","https://openalex.org/W4360600216","https://openalex.org/W4360620260","https://openalex.org/W4362667966","https://openalex.org/W4377292302","https://openalex.org/W4377990899","https://openalex.org/W4379984888","https://openalex.org/W4380202601","https://openalex.org/W4384303965","https://openalex.org/W4385800720","https://openalex.org/W4386223611","https://openalex.org/W4386234565","https://openalex.org/W4386920535","https://openalex.org/W4387215177","https://openalex.org/W4387520297","https://openalex.org/W4388570656","https://openalex.org/W4388620484","https://openalex.org/W4391963716","https://openalex.org/W4392131573","https://openalex.org/W4394730920","https://openalex.org/W4395471007","https://openalex.org/W4402916065","https://openalex.org/W4404031727"],"related_works":["https://openalex.org/W2981428355","https://openalex.org/W1834994814","https://openalex.org/W2041273198","https://openalex.org/W1599055764","https://openalex.org/W2131711534","https://openalex.org/W2149163000","https://openalex.org/W2962858469","https://openalex.org/W2289873871","https://openalex.org/W2559040841","https://openalex.org/W114661351"],"abstract_inverted_index":{"Communication":[0],"in":[1,11,33,84],"healthcare":[2,20],"settings":[3],"is":[4],"sometimes":[5],"affected":[6],"by":[7,36],"ambient":[8],"noise,":[9],"resulting":[10],"possible":[12],"misunderstanding":[13],"of":[14,79,118,137,155],"essential":[15],"information.":[16],"We":[17],"introduce":[18],"the":[19,77,110,116,119],"audio-visual":[21],"deep":[22],"fusion":[23,72],"(HAV-DF)":[24],"model,":[25],"an":[26,69,92],"innovative":[27,93],"method":[28,73],"that":[29,57,74,96],"improves":[30],"speech":[31],"comprehension":[32],"clinical":[34],"environments":[35],"intelligently":[37],"merging":[38],"acoustic":[39,159],"and":[40,81,112],"visual":[41,60,82],"data.":[42],"The":[43,147],"HAV-DF":[44,132],"model":[45,121,148],"has":[46],"three":[47],"key":[48],"advancements.":[49],"First,":[50],"it":[51,67,90],"utilizes":[52],"a":[53,134,140,150],"medical":[54,64,105,151],"video":[55],"interface":[56],"collects":[58],"nuanced":[59],"signals":[61],"pertinent":[62],"to":[63,86,100,144,178],"communication.":[65],"Then,":[66],"employs":[68,91],"advanced":[70],"multimodal":[71],"adaptively":[75],"modifies":[76],"integration":[78],"auditory":[80],"data":[83],"response":[85],"noisy":[87],"situations.":[88,126],"Finally,":[89],"loss":[94],"function":[95],"integrates":[97],"healthcare-specific":[98],"indicators":[99],"increase":[101],"voice":[102],"optimization":[103],"for":[104],"applications.":[106],"Experimental":[107],"findings":[108],"on":[109],"MedDialog":[111],"MedVidQA":[113],"datasets":[114],"illustrate":[115],"efficacy":[117,122],"proposed":[120],"under":[123,157],"diverse":[124],"noise":[125],"In":[127],"low":[128],"SNR":[129],"situations":[130],"(-5dB),":[131],"attains":[133],"PESQ":[135],"score":[136],"2.45,":[138],"indicating":[139],"25%":[141],"enhancement":[142],"compared":[143],"leading":[145],"approaches.":[146],"achieves":[149],"term":[152],"preservation":[153],"rate":[154],"93.18%":[156],"difficult":[158],"settings,":[160],"markedly":[161],"surpassing":[162],"current":[163],"methodologies.":[164],"These":[165],"enhancements":[166],"provide":[167],"more":[168],"dependable":[169],"communication":[170],"across":[171],"many":[172],"therapeutic":[173],"contexts,":[174],"from":[175],"emergency":[176],"departments":[177],"telemedicine":[179],"consultations.":[180]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-10-10T00:00:00"}
