{"id":"https://openalex.org/W4415707908","doi":"https://doi.org/10.1109/icme59968.2025.11208908","title":"Multimodal Representation Learning Techniques for Comprehensive Facial State Analysis","display_name":"Multimodal Representation Learning Techniques for Comprehensive Facial State Analysis","publication_year":2025,"publication_date":"2025-06-30","ids":{"openalex":"https://openalex.org/W4415707908","doi":"https://doi.org/10.1109/icme59968.2025.11208908"},"language":"en","primary_location":{"id":"doi:10.1109/icme59968.2025.11208908","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icme59968.2025.11208908","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Multimedia and Expo (ICME)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://eprints.gla.ac.uk/view/author/76185.html>,","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Kaiwen Zheng","orcid":null},"institutions":[{"id":"https://openalex.org/I7882870","display_name":"University of Glasgow","ror":"https://ror.org/00vtgdb53","country_code":"GB","type":"education","lineage":["https://openalex.org/I7882870"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Kaiwen Zheng","raw_affiliation_strings":["University of Glasgow,School of Computing Science,Glasgow,United Kingdom"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Glasgow,School of Computing Science,Glasgow,United Kingdom","institution_ids":["https://openalex.org/I7882870"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026473068","display_name":"Xuri Ge","orcid":"https://orcid.org/0000-0002-3925-4951"},"institutions":[{"id":"https://openalex.org/I4210100255","display_name":"Beijing Academy of Artificial Intelligence","ror":"https://ror.org/016a74861","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210100255"]},{"id":"https://openalex.org/I4210164862","display_name":"Artificial Intelligence in Medicine (Canada)","ror":"https://ror.org/05p590m36","country_code":"CA","type":"company","lineage":["https://openalex.org/I4210164862"]}],"countries":["CA","CN"],"is_corresponding":false,"raw_author_name":"Xuri Ge","raw_affiliation_strings":["Shandong University,School of Artificial Intelligence,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Shandong University,School of Artificial Intelligence,China","institution_ids":["https://openalex.org/I4210100255","https://openalex.org/I4210164862"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5012498345","display_name":"Junchen Fu","orcid":"https://orcid.org/0000-0003-4759-2042"},"institutions":[{"id":"https://openalex.org/I7882870","display_name":"University of Glasgow","ror":"https://ror.org/00vtgdb53","country_code":"GB","type":"education","lineage":["https://openalex.org/I7882870"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Junchen Fu","raw_affiliation_strings":["University of Glasgow,School of Computing Science,Glasgow,United Kingdom"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Glasgow,School of Computing Science,Glasgow,United Kingdom","institution_ids":["https://openalex.org/I7882870"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5001364813","display_name":"Jun Peng","orcid":"https://orcid.org/0000-0002-0404-1956"},"institutions":[{"id":"https://openalex.org/I4210136793","display_name":"Peng Cheng Laboratory","ror":"https://ror.org/03qdqbt06","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210136793"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jun Peng","raw_affiliation_strings":["Peng Cheng Laboratory,Shenzhen,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Peng Cheng Laboratory,Shenzhen,China","institution_ids":["https://openalex.org/I4210136793"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5069702331","display_name":"Joemon M. Jose","orcid":"https://orcid.org/0000-0001-9228-1759"},"institutions":[{"id":"https://openalex.org/I7882870","display_name":"University of Glasgow","ror":"https://ror.org/00vtgdb53","country_code":"GB","type":"education","lineage":["https://openalex.org/I7882870"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Joemon M. Jose","raw_affiliation_strings":["University of Glasgow,School of Computing Science,Glasgow,United Kingdom"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Glasgow,School of Computing Science,Glasgow,United Kingdom","institution_ids":["https://openalex.org/I7882870"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.28872442,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10667","display_name":"Emotion and Mood Recognition","score":0.9032999873161316,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T10667","display_name":"Emotion and Mood Recognition","score":0.9032999873161316,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.04670000076293945,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11094","display_name":"Face Recognition and Perception","score":0.021900000050663948,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.5555999875068665},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.5476999878883362},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.4952000081539154},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.4528999924659729},{"id":"https://openalex.org/keywords/feature-learning","display_name":"Feature learning","score":0.44190001487731934},{"id":"https://openalex.org/keywords/facial-expression","display_name":"Facial expression","score":0.42170000076293945},{"id":"https://openalex.org/keywords/perception","display_name":"Perception","score":0.41920000314712524},{"id":"https://openalex.org/keywords/multimodal-learning","display_name":"Multimodal learning","score":0.4178999960422516},{"id":"https://openalex.org/keywords/face","display_name":"Face (sociological concept)","score":0.40459999442100525}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7906000018119812},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6008999943733215},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.5555999875068665},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.5476999878883362},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.4952000081539154},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.4528999924659729},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.44190001487731934},{"id":"https://openalex.org/C195704467","wikidata":"https://www.wikidata.org/wiki/Q327968","display_name":"Facial expression","level":2,"score":0.42170000076293945},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.41920000314712524},{"id":"https://openalex.org/C2780660688","wikidata":"https://www.wikidata.org/wiki/Q25052564","display_name":"Multimodal learning","level":2,"score":0.4178999960422516},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.40860000252723694},{"id":"https://openalex.org/C2779304628","wikidata":"https://www.wikidata.org/wiki/Q3503480","display_name":"Face (sociological concept)","level":2,"score":0.40459999442100525},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.3894999921321869},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3578000068664551},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.33390000462532043},{"id":"https://openalex.org/C101814296","wikidata":"https://www.wikidata.org/wiki/Q5439685","display_name":"Feature model","level":3,"score":0.3337000012397766},{"id":"https://openalex.org/C31510193","wikidata":"https://www.wikidata.org/wiki/Q1192553","display_name":"Facial recognition system","level":3,"score":0.322299987077713},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.3140000104904175},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.31029999256134033},{"id":"https://openalex.org/C135641252","wikidata":"https://www.wikidata.org/wiki/Q738567","display_name":"Multimodal interaction","level":2,"score":0.2782000005245209},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.2702000141143799},{"id":"https://openalex.org/C2780966255","wikidata":"https://www.wikidata.org/wiki/Q5474306","display_name":"Foundation (evidence)","level":2,"score":0.26809999346733093},{"id":"https://openalex.org/C178253425","wikidata":"https://www.wikidata.org/wiki/Q162668","display_name":"Visual perception","level":3,"score":0.26660001277923584},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.266400009393692},{"id":"https://openalex.org/C144986985","wikidata":"https://www.wikidata.org/wiki/Q871236","display_name":"Hierarchical database model","level":2,"score":0.26510000228881836},{"id":"https://openalex.org/C77618280","wikidata":"https://www.wikidata.org/wiki/Q1155772","display_name":"Scheme (mathematics)","level":2,"score":0.2581999897956848},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.2572999894618988},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.25699999928474426},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.25540000200271606}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/icme59968.2025.11208908","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icme59968.2025.11208908","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Multimedia and Expo (ICME)","raw_type":"proceedings-article"},{"id":"pmh:oai:eprints.gla.ac.uk:352325","is_oa":true,"landing_page_url":"https://eprints.gla.ac.uk/view/author/76185.html>,","pdf_url":null,"source":{"id":"https://openalex.org/S4210235606","display_name":"ENLIGHTEN (Jurnal Bimbingan dan Konseling Islam)","issn_l":"2622-8912","issn":["2622-8912","2622-8920"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"acceptedVersion","is_accepted":true,"is_published":false,"raw_source_name":null,"raw_type":"PeerReviewed"}],"best_oa_location":{"id":"pmh:oai:eprints.gla.ac.uk:352325","is_oa":true,"landing_page_url":"https://eprints.gla.ac.uk/view/author/76185.html>,","pdf_url":null,"source":{"id":"https://openalex.org/S4210235606","display_name":"ENLIGHTEN (Jurnal Bimbingan dan Konseling Islam)","issn_l":"2622-8912","issn":["2622-8912","2622-8920"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"acceptedVersion","is_accepted":true,"is_published":false,"raw_source_name":null,"raw_type":"PeerReviewed"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":33,"referenced_works":["https://openalex.org/W2045472600","https://openalex.org/W2051297709","https://openalex.org/W2083021723","https://openalex.org/W2103943262","https://openalex.org/W2436394355","https://openalex.org/W2502450975","https://openalex.org/W2587982884","https://openalex.org/W2734289922","https://openalex.org/W2745497104","https://openalex.org/W2790776461","https://openalex.org/W2792578050","https://openalex.org/W2889978276","https://openalex.org/W2942489367","https://openalex.org/W2949662773","https://openalex.org/W2953843303","https://openalex.org/W3000361961","https://openalex.org/W3039156183","https://openalex.org/W3093370878","https://openalex.org/W3108527469","https://openalex.org/W3182710365","https://openalex.org/W4210792305","https://openalex.org/W4225568094","https://openalex.org/W4285601347","https://openalex.org/W4307045186","https://openalex.org/W4313130906","https://openalex.org/W4323033563","https://openalex.org/W4365512576","https://openalex.org/W4378465281","https://openalex.org/W4385815442","https://openalex.org/W4386038389","https://openalex.org/W4393942911","https://openalex.org/W4401307507","https://openalex.org/W4414165646"],"related_works":[],"abstract_inverted_index":{"Multimodal":[0,79,107],"foundation":[1,194],"models":[2],"have":[3],"significantly":[4],"improved":[5],"feature":[6,127],"representation":[7,30],"by":[8,83,98],"integrating":[9],"information":[10],"from":[11],"multiple":[12],"modalities,":[13],"making":[14],"them":[15],"highly":[16],"suitable":[17],"for":[18,31,69,115,203],"a":[19,50,66,77,104,164],"broader":[20],"set":[21],"of":[22,27,89,135,192],"applications.":[23],"However,":[24],"the":[25,190,193],"exploration":[26],"multimodal":[28,70],"facial":[29,40,71,144],"understanding":[32],"perception":[33],"has":[34],"been":[35],"limited.":[36],"Understanding":[37],"and":[38,47,52,58,95,119,132,154,178,205],"analyzing":[39],"states,":[41],"such":[42],"as":[43],"Action":[44,92,116],"Units":[45],"(AUs)":[46],"emotions,":[48],"require":[49],"comprehensive":[51,67,125],"robust":[53],"framework":[54],"that":[55,169],"bridges":[56],"visual":[57,126,149],"linguistic":[59],"modalities.":[60],"In":[61],"this":[62],"paper,":[63],"we":[64,75,102,162],"present":[65],"pipeline":[68],"state":[72],"analysis.":[73],"First,":[74],"compile":[76],"new":[78],"Face":[80,108],"Dataset":[81],"(MFA)":[82],"generating":[84],"detailed":[85,143],"multilevel":[86],"language":[87],"descriptions":[88],"face,":[90],"incorporating":[91],"Unit":[93,117],"(AU)":[94,118],"emotion":[96,120,155,206],"descriptions,":[97,156],"leveraging":[99],"GPT-4o.":[100],"Second,":[101],"introduce":[103],"novel":[105],"Multilevel":[106],"Foundation":[109],"model":[110,123,195],"(MF<sup":[111],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[112,173],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">2</sup>)":[113],"tailored":[114],"recognition.":[121],"Our":[122],"incorporates":[124],"modeling":[128],"at":[129],"both":[130],"local":[131],"global":[133],"levels":[134],"face":[136],"image,":[137],"enhancing":[138],"its":[139],"ability":[140],"to":[141,196],"represent":[142],"appearances.":[145],"This":[146,180],"design":[147],"aligns":[148],"representations":[150],"with":[151],"structured":[152],"AU":[153,204],"ensuring":[157],"effective":[158],"cross-modal":[159],"integration.":[160],"Third,":[161],"develop":[163],"Decoupled":[165],"Fine-Tuning":[166],"Network":[167],"(DFN)":[168],"efficiently":[170],"adapts":[171],"MF<sup":[172],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">2</sup>":[174],"across":[175],"various":[176],"tasks":[177],"datasets.":[179],"approach":[181],"not":[182],"only":[183],"reduces":[184],"computational":[185],"overhead":[186],"but":[187],"also":[188],"broadens":[189],"applicability":[191],"diverse":[197],"scenarios.":[198],"Experimentation":[199],"show":[200],"superior":[201],"performance":[202],"detection":[207],"tasks.":[208]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
