{"id":"https://openalex.org/W7129651857","doi":"https://doi.org/10.1109/icipw68931.2025.11385918","title":"Cross-Modal Contrastive Learning with Attention for Enhanced Facial Feature Extraction in Multimodal Emotion Recognition","display_name":"Cross-Modal Contrastive Learning with Attention for Enhanced Facial Feature Extraction in Multimodal Emotion Recognition","publication_year":2025,"publication_date":"2025-09-14","ids":{"openalex":"https://openalex.org/W7129651857","doi":"https://doi.org/10.1109/icipw68931.2025.11385918"},"language":null,"primary_location":{"id":"doi:10.1109/icipw68931.2025.11385918","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icipw68931.2025.11385918","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Image Processing Workshops (ICIPW)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5056714285","display_name":"Miaohong Shi","orcid":null},"institutions":[{"id":"https://openalex.org/I4210100005","display_name":"Silicon Technologies (United States)","ror":"https://ror.org/013qwzt07","country_code":"US","type":"company","lineage":["https://openalex.org/I4210100005"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Miaohong Shi","raw_affiliation_strings":["Reconova Technologies Co., Ltd,China"],"affiliations":[{"raw_affiliation_string":"Reconova Technologies Co., Ltd,China","institution_ids":["https://openalex.org/I4210100005"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5125257983","display_name":"Baozhi Jia","orcid":null},"institutions":[{"id":"https://openalex.org/I4210100005","display_name":"Silicon Technologies (United States)","ror":"https://ror.org/013qwzt07","country_code":"US","type":"company","lineage":["https://openalex.org/I4210100005"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Baozhi Jia","raw_affiliation_strings":["Reconova Technologies Co., Ltd,China"],"affiliations":[{"raw_affiliation_string":"Reconova Technologies Co., Ltd,China","institution_ids":["https://openalex.org/I4210100005"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5056714285"],"corresponding_institution_ids":["https://openalex.org/I4210100005"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.81662322,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"540","last_page":"544"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10667","display_name":"Emotion and Mood Recognition","score":0.9779000282287598,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T10667","display_name":"Emotion and Mood Recognition","score":0.9779000282287598,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.006200000178068876,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10709","display_name":"Social Robot Interaction and HRI","score":0.00279999990016222,"subfield":{"id":"https://openalex.org/subfields/3207","display_name":"Social Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.5453000068664551},{"id":"https://openalex.org/keywords/emotion-recognition","display_name":"Emotion recognition","score":0.5264000296592712},{"id":"https://openalex.org/keywords/salient","display_name":"Salient","score":0.5005000233650208},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.44780001044273376},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4221000075340271},{"id":"https://openalex.org/keywords/facial-recognition-system","display_name":"Facial recognition system","score":0.41260001063346863},{"id":"https://openalex.org/keywords/feature-learning","display_name":"Feature learning","score":0.36719998717308044}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6751000285148621},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.5453000068664551},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5407999753952026},{"id":"https://openalex.org/C2777438025","wikidata":"https://www.wikidata.org/wiki/Q1339090","display_name":"Emotion recognition","level":2,"score":0.5264000296592712},{"id":"https://openalex.org/C2780719617","wikidata":"https://www.wikidata.org/wiki/Q1030752","display_name":"Salient","level":2,"score":0.5005000233650208},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.44780001044273376},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.44190001487731934},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4221000075340271},{"id":"https://openalex.org/C31510193","wikidata":"https://www.wikidata.org/wiki/Q1192553","display_name":"Facial recognition system","level":3,"score":0.41260001063346863},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.36719998717308044},{"id":"https://openalex.org/C195704467","wikidata":"https://www.wikidata.org/wiki/Q327968","display_name":"Facial expression","level":2,"score":0.3424000144004822},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.32839998602867126},{"id":"https://openalex.org/C207347870","wikidata":"https://www.wikidata.org/wiki/Q371174","display_name":"Gesture","level":2,"score":0.3248000144958496},{"id":"https://openalex.org/C101738243","wikidata":"https://www.wikidata.org/wiki/Q786435","display_name":"Autoencoder","level":3,"score":0.32260000705718994},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.299699991941452},{"id":"https://openalex.org/C2777629044","wikidata":"https://www.wikidata.org/wiki/Q614959","display_name":"Contrastive analysis","level":2,"score":0.2874000072479248},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.26919999718666077},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.26170000433921814},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.2596000134944916},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.2531999945640564}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icipw68931.2025.11385918","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icipw68931.2025.11385918","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Image Processing Workshops (ICIPW)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":10,"referenced_works":["https://openalex.org/W2146334809","https://openalex.org/W2516508966","https://openalex.org/W2518587879","https://openalex.org/W2883409523","https://openalex.org/W3176463841","https://openalex.org/W4317358019","https://openalex.org/W4362721809","https://openalex.org/W4400409555","https://openalex.org/W4402419701","https://openalex.org/W4410242506"],"related_works":[],"abstract_inverted_index":{"We":[0],"propose":[1],"a":[2,24,28,35],"cross-modal":[3,45,74],"contrastive":[4,41],"learning":[5],"framework":[6],"with":[7,27],"attention":[8,46],"to":[9],"enhance":[10],"facial":[11,32,50],"feature":[12],"extraction":[13],"for":[14,31,37],"multimodal":[15],"emotion":[16,66],"recognition":[17,67],"in":[18,65],"service":[19],"robots.":[20],"The":[21],"method":[22],"uses":[23],"dual-branch":[25],"encoder":[26],"Vision":[29],"Transformer":[30],"images":[33],"and":[34,69],"Conformer":[36],"audio":[38,54],"spectrograms,optimized":[39],"via":[40],"learning.":[42],"A":[43],"patch-level":[44],"mechanism":[47],"dynamically":[48],"weights":[49],"regions":[51],"based":[52],"on":[53,57],"relevance,":[55],"focusing":[56],"emotionally":[58],"salient":[59],"features.":[60],"Experiments":[61],"show":[62],"significant":[63],"improvements":[64],"accuracy":[68],"provide":[70],"interpretable":[71],"insights":[72],"into":[73],"interactions.":[75]},"counts_by_year":[],"updated_date":"2026-02-19T06:27:42.648592","created_date":"2026-02-18T00:00:00"}
