{"id":"https://openalex.org/W4210405389","doi":"https://doi.org/10.1109/fg52635.2021.9667030","title":"Multi-Modal Learning for AU Detection Based on Multi-Head Fused Transformers","display_name":"Multi-Modal Learning for AU Detection Based on Multi-Head Fused Transformers","publication_year":2021,"publication_date":"2021-12-15","ids":{"openalex":"https://openalex.org/W4210405389","doi":"https://doi.org/10.1109/fg52635.2021.9667030"},"language":"en","primary_location":{"id":"doi:10.1109/fg52635.2021.9667030","is_oa":false,"landing_page_url":"https://doi.org/10.1109/fg52635.2021.9667030","pdf_url":null,"source":{"id":"https://openalex.org/S4363608446","display_name":"2021 16th IEEE International Conference on Automatic Face and Gesture Recognition (FG 2021)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 16th IEEE International Conference on Automatic Face and Gesture Recognition (FG 2021)","raw_type":"proceedings-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2203.11441","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100368929","display_name":"Xiang Zhang","orcid":"https://orcid.org/0009-0001-9312-1762"},"institutions":[{"id":"https://openalex.org/I123946342","display_name":"Binghamton University","ror":"https://ror.org/008rmbt77","country_code":"US","type":"education","lineage":["https://openalex.org/I123946342"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Xiang Zhang","raw_affiliation_strings":["State University of New York,Department of Computer Science,Binghamton,NY,USA","Department of Computer Science, State University of New York, Binghamton, NY, USA"],"affiliations":[{"raw_affiliation_string":"State University of New York,Department of Computer Science,Binghamton,NY,USA","institution_ids":["https://openalex.org/I123946342"]},{"raw_affiliation_string":"Department of Computer Science, State University of New York, Binghamton, NY, USA","institution_ids":["https://openalex.org/I123946342"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100411653","display_name":"Lijun Yin","orcid":"https://orcid.org/0000-0002-0343-7190"},"institutions":[{"id":"https://openalex.org/I123946342","display_name":"Binghamton University","ror":"https://ror.org/008rmbt77","country_code":"US","type":"education","lineage":["https://openalex.org/I123946342"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Lijun Yin","raw_affiliation_strings":["State University of New York,Department of Computer Science,Binghamton,NY,USA","Department of Computer Science, State University of New York, Binghamton, NY, USA"],"affiliations":[{"raw_affiliation_string":"State University of New York,Department of Computer Science,Binghamton,NY,USA","institution_ids":["https://openalex.org/I123946342"]},{"raw_affiliation_string":"Department of Computer Science, State University of New York, Binghamton, NY, USA","institution_ids":["https://openalex.org/I123946342"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5100368929"],"corresponding_institution_ids":["https://openalex.org/I123946342"],"apc_list":null,"apc_paid":null,"fwci":1.2046,"has_fulltext":false,"cited_by_count":10,"citation_normalized_percentile":{"value":0.8654105,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"8"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9955999851226807,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9955999851226807,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11398","display_name":"Hand Gesture Recognition Systems","score":0.9919999837875366,"subfield":{"id":"https://openalex.org/subfields/1709","display_name":"Human-Computer Interaction"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9908000230789185,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/modalities","display_name":"Modalities","score":0.7622218728065491},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7553809881210327},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.7226604223251343},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.6335112452507019},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5979864597320557},{"id":"https://openalex.org/keywords/feature-learning","display_name":"Feature learning","score":0.4456065893173218},{"id":"https://openalex.org/keywords/modal","display_name":"Modal","score":0.4356154501438141},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.42674127221107483},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.37052005529403687},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.1471005082130432},{"id":"https://openalex.org/keywords/voltage","display_name":"Voltage","score":0.07333594560623169}],"concepts":[{"id":"https://openalex.org/C2779903281","wikidata":"https://www.wikidata.org/wiki/Q6888026","display_name":"Modalities","level":2,"score":0.7622218728065491},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7553809881210327},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.7226604223251343},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.6335112452507019},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5979864597320557},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.4456065893173218},{"id":"https://openalex.org/C71139939","wikidata":"https://www.wikidata.org/wiki/Q910194","display_name":"Modal","level":2,"score":0.4356154501438141},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.42674127221107483},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.37052005529403687},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.1471005082130432},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.07333594560623169},{"id":"https://openalex.org/C188027245","wikidata":"https://www.wikidata.org/wiki/Q750446","display_name":"Polymer chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0},{"id":"https://openalex.org/C36289849","wikidata":"https://www.wikidata.org/wiki/Q34749","display_name":"Social science","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/fg52635.2021.9667030","is_oa":false,"landing_page_url":"https://doi.org/10.1109/fg52635.2021.9667030","pdf_url":null,"source":{"id":"https://openalex.org/S4363608446","display_name":"2021 16th IEEE International Conference on Automatic Face and Gesture Recognition (FG 2021)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 16th IEEE International Conference on Automatic Face and Gesture Recognition (FG 2021)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2203.11441","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2203.11441","pdf_url":"https://arxiv.org/pdf/2203.11441","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2203.11441","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2203.11441","pdf_url":"https://arxiv.org/pdf/2203.11441","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[{"display_name":"Quality Education","score":0.4000000059604645,"id":"https://metadata.un.org/sdg/4"}],"awards":[{"id":"https://openalex.org/G94799126","display_name":null,"funder_award_id":"CNS-1629898","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":55,"referenced_works":["https://openalex.org/W1040410175","https://openalex.org/W1480583224","https://openalex.org/W1595126664","https://openalex.org/W1938551397","https://openalex.org/W2034069713","https://openalex.org/W2051297709","https://openalex.org/W2163605009","https://openalex.org/W2194775991","https://openalex.org/W2421475762","https://openalex.org/W2470957930","https://openalex.org/W2589142773","https://openalex.org/W2619383789","https://openalex.org/W2621864722","https://openalex.org/W2767290858","https://openalex.org/W2789992948","https://openalex.org/W2816936748","https://openalex.org/W2893915321","https://openalex.org/W2896457183","https://openalex.org/W2903991757","https://openalex.org/W2904106524","https://openalex.org/W2922226605","https://openalex.org/W2949868867","https://openalex.org/W2961770861","https://openalex.org/W2963192057","https://openalex.org/W2963890275","https://openalex.org/W2964007075","https://openalex.org/W2964051877","https://openalex.org/W2964185501","https://openalex.org/W2967177252","https://openalex.org/W2969059826","https://openalex.org/W2981677410","https://openalex.org/W3035022492","https://openalex.org/W3035574168","https://openalex.org/W3092462694","https://openalex.org/W3094502228","https://openalex.org/W3096609285","https://openalex.org/W3097616280","https://openalex.org/W3101614002","https://openalex.org/W3172863135","https://openalex.org/W3176327573","https://openalex.org/W4288089799","https://openalex.org/W4292779060","https://openalex.org/W4385245566","https://openalex.org/W4394666973","https://openalex.org/W6640261410","https://openalex.org/W6684191040","https://openalex.org/W6739901393","https://openalex.org/W6750298367","https://openalex.org/W6754835258","https://openalex.org/W6755207826","https://openalex.org/W6769627184","https://openalex.org/W6778485988","https://openalex.org/W6778883912","https://openalex.org/W6780226713","https://openalex.org/W6784094891"],"related_works":["https://openalex.org/W2185469136","https://openalex.org/W2011264131","https://openalex.org/W4306353150","https://openalex.org/W2026860389","https://openalex.org/W8219677","https://openalex.org/W3216879894","https://openalex.org/W2890132085","https://openalex.org/W2168054807","https://openalex.org/W4301143707","https://openalex.org/W2952745240"],"abstract_inverted_index":{"Multi-modal":[0],"learning":[1,31],"has":[2,94],"been":[3,95],"intensified":[4],"in":[5,11,25,51,98,111,156],"recent":[6],"years,":[7],"especially":[8],"for":[9,32,38,56,128,161],"applications":[10],"facial":[12],"analysis":[13],"and":[14,34,102,143,179,181,190],"action":[15],"unit":[16],"detection":[17,113,200],"whilst":[18],"there":[19,41],"still":[20],"exist":[21],"two":[22,173],"main":[23],"challenges":[24],"terms":[26],"of":[27,45,61,67,78,165,198],"1)":[28],"relevant":[29],"feature":[30],"representation":[33,136],"2)":[35],"efficient":[36,91],"fusion":[37,148,152,158,164],"multi-modalities.":[39],"Recently,":[40],"are":[42,63,184],"a":[43,89,120],"number":[44],"works":[46],"have":[47],"shown":[48],"the":[49,53,65,82,85,157,162,182,187,196],"effectiveness":[50],"utilizing":[52],"attention":[54,75,153],"mechanism":[55],"AU":[57,112,129,133,176,199],"detection,":[58,130],"however,":[59],"most":[60],"them":[62],"binding":[64],"region":[66],"interest":[68],"(ROI)":[69],"with":[70],"features":[71,77,135],"but":[72,106],"rarely":[73],"apply":[74],"between":[76],"each":[79],"AU.":[80],"On":[81],"other":[83],"hand,":[84],"transformer,":[86],"which":[87,131],"utilizes":[88],"more":[90],"self-attention":[92],"mechanism,":[93],"widely":[96],"used":[97],"natural":[99],"language":[100],"processing":[101],"computer":[103],"vision":[104],"tasks":[105],"is":[107,154,170],"not":[108],"fully":[109],"explored":[110],"tasks.":[114],"In":[115],"this":[116],"paper,":[117],"we":[118],"propose":[119],"novel":[121],"end-to-end":[122],"Multi-Head":[123],"Fused":[124],"Transformer":[125],"(MFT)":[126],"method":[127],"learns":[132],"encoding":[134],"from":[137,201],"different":[138,202],"modalities":[139,145],"by":[140,146],"transformer":[141,149,159],"encoder":[142],"fuses":[144],"another":[147],"module.":[150],"Multi-head":[151],"designed":[155],"module":[160],"effective":[163],"multiple":[166],"modalities.":[167,203],"Our":[168],"approach":[169],"evaluated":[171],"on":[172],"public":[174],"multi-modal":[175],"databases,":[177],"BP4D,":[178],"BP4D+,":[180],"results":[183],"superior":[185],"to":[186],"state-of-the-art":[188],"algorithms":[189],"baseline":[191],"models.":[192],"We":[193],"further":[194],"analyze":[195],"performance":[197]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":4}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2022-02-08T00:00:00"}
