{"id":"https://openalex.org/W4403713326","doi":"https://doi.org/10.1145/3689062.3689083","title":"Larger Encoders, Smaller Regressors: Exploring Label Dimensionality Reduction and Multimodal Large Language Models as Feature Extractors for Predicting Social Perception","display_name":"Larger Encoders, Smaller Regressors: Exploring Label Dimensionality Reduction and Multimodal Large Language Models as Feature Extractors for Predicting Social Perception","publication_year":2024,"publication_date":"2024-10-23","ids":{"openalex":"https://openalex.org/W4403713326","doi":"https://doi.org/10.1145/3689062.3689083"},"language":"en","primary_location":{"id":"doi:10.1145/3689062.3689083","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3689062.3689083","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 5th on Multimodal Sentiment Analysis Challenge and Workshop: Social Perception and Humor","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://oa.upm.es/84579/1/10259692.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5093620465","display_name":"Iv\u00e1n Mart\u00edn-Fern\u00e1ndez","orcid":"https://orcid.org/0009-0004-2769-9752"},"institutions":[{"id":"https://openalex.org/I88060688","display_name":"Universidad Polit\u00e9cnica de Madrid","ror":"https://ror.org/03n6nwv02","country_code":"ES","type":"education","lineage":["https://openalex.org/I88060688"]}],"countries":["ES"],"is_corresponding":true,"raw_author_name":"Iv\u00e1n Mart\u00edn-Fern\u00e1ndez","raw_affiliation_strings":["THAU Group, IPTC, Universidad Polit\u00e9cnica de Madrid, Madrid, Spain"],"raw_orcid":"https://orcid.org/0009-0004-2769-9752","affiliations":[{"raw_affiliation_string":"THAU Group, IPTC, Universidad Polit\u00e9cnica de Madrid, Madrid, Spain","institution_ids":["https://openalex.org/I88060688"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5094034370","display_name":"Sergio Esteban-Romero","orcid":"https://orcid.org/0009-0008-6336-7877"},"institutions":[{"id":"https://openalex.org/I88060688","display_name":"Universidad Polit\u00e9cnica de Madrid","ror":"https://ror.org/03n6nwv02","country_code":"ES","type":"education","lineage":["https://openalex.org/I88060688"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Sergio Esteban-Romero","raw_affiliation_strings":["THAU Group, IPTC, Universidad Polit\u00e9cnica de Madrid, Madrid, Spain"],"raw_orcid":"https://orcid.org/0009-0008-6336-7877","affiliations":[{"raw_affiliation_string":"THAU Group, IPTC, Universidad Polit\u00e9cnica de Madrid, Madrid, Spain","institution_ids":["https://openalex.org/I88060688"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114389148","display_name":"Jaime Bellver-Soler","orcid":null},"institutions":[{"id":"https://openalex.org/I88060688","display_name":"Universidad Polit\u00e9cnica de Madrid","ror":"https://ror.org/03n6nwv02","country_code":"ES","type":"education","lineage":["https://openalex.org/I88060688"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Jaime Bellver-Soler","raw_affiliation_strings":["THAU Group, IPTC, Universidad Polit\u00e9cnica de Madrid, Madrid, Spain"],"raw_orcid":"https://orcid.org/0009-0006-7973-4913","affiliations":[{"raw_affiliation_string":"THAU Group, IPTC, Universidad Polit\u00e9cnica de Madrid, Madrid, Spain","institution_ids":["https://openalex.org/I88060688"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5032875587","display_name":"Fernando Fern\u00e1ndez-Mart\u00ednez","orcid":"https://orcid.org/0000-0003-3877-0089"},"institutions":[{"id":"https://openalex.org/I88060688","display_name":"Universidad Polit\u00e9cnica de Madrid","ror":"https://ror.org/03n6nwv02","country_code":"ES","type":"education","lineage":["https://openalex.org/I88060688"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Fernando Fern\u00e1ndez-Mart\u00ednez","raw_affiliation_strings":["THAU Group, IPTC, Universidad Polit\u00e9cnica de Madrid, Madrid, Spain"],"raw_orcid":"https://orcid.org/0000-0003-3877-0089","affiliations":[{"raw_affiliation_string":"THAU Group, IPTC, Universidad Polit\u00e9cnica de Madrid, Madrid, Spain","institution_ids":["https://openalex.org/I88060688"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5058484271","display_name":"Manuel Gil-Mart\u00edn","orcid":"https://orcid.org/0000-0002-4285-6224"},"institutions":[{"id":"https://openalex.org/I88060688","display_name":"Universidad Polit\u00e9cnica de Madrid","ror":"https://ror.org/03n6nwv02","country_code":"ES","type":"education","lineage":["https://openalex.org/I88060688"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Manuel Gil-Mart\u00edn","raw_affiliation_strings":["THAU Group, IPTC, Universidad Polit\u00e9cnica de Madrid, Madrid, Spain"],"raw_orcid":"https://orcid.org/0000-0002-4285-6224","affiliations":[{"raw_affiliation_string":"THAU Group, IPTC, Universidad Polit\u00e9cnica de Madrid, Madrid, Spain","institution_ids":["https://openalex.org/I88060688"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5093620465"],"corresponding_institution_ids":["https://openalex.org/I88060688"],"apc_list":null,"apc_paid":null,"fwci":1.6557,"has_fulltext":true,"cited_by_count":5,"citation_normalized_percentile":{"value":0.86777684,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"20","last_page":"27"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13083","display_name":"Advanced Text Analysis Techniques","score":0.9940000176429749,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T13083","display_name":"Advanced Text Analysis Techniques","score":0.9940000176429749,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10664","display_name":"Sentiment Analysis and Opinion Mining","score":0.9919000267982483,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10667","display_name":"Emotion and Mood Recognition","score":0.9736999869346619,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/dimensionality-reduction","display_name":"Dimensionality reduction","score":0.7868428230285645},{"id":"https://openalex.org/keywords/perception","display_name":"Perception","score":0.6999426484107971},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6951296925544739},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.6386033296585083},{"id":"https://openalex.org/keywords/reduction","display_name":"Reduction (mathematics)","score":0.6006299257278442},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.5855124592781067},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5210384726524353},{"id":"https://openalex.org/keywords/curse-of-dimensionality","display_name":"Curse of dimensionality","score":0.5080623626708984},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4276021122932434},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4255502223968506},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.3748537302017212},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3630620539188385},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.20186761021614075},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.14322733879089355},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.1259966492652893}],"concepts":[{"id":"https://openalex.org/C70518039","wikidata":"https://www.wikidata.org/wiki/Q16000077","display_name":"Dimensionality reduction","level":2,"score":0.7868428230285645},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.6999426484107971},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6951296925544739},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.6386033296585083},{"id":"https://openalex.org/C111335779","wikidata":"https://www.wikidata.org/wiki/Q3454686","display_name":"Reduction (mathematics)","level":2,"score":0.6006299257278442},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.5855124592781067},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5210384726524353},{"id":"https://openalex.org/C111030470","wikidata":"https://www.wikidata.org/wiki/Q1430460","display_name":"Curse of dimensionality","level":2,"score":0.5080623626708984},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4276021122932434},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4255502223968506},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.3748537302017212},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3630620539188385},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.20186761021614075},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.14322733879089355},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.1259966492652893},{"id":"https://openalex.org/C169760540","wikidata":"https://www.wikidata.org/wiki/Q207011","display_name":"Neuroscience","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3689062.3689083","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3689062.3689083","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 5th on Multimodal Sentiment Analysis Challenge and Workshop: Social Perception and Humor","raw_type":"proceedings-article"},{"id":"pmh:oai:oa.upm.es:84579","is_oa":true,"landing_page_url":"https://oa.upm.es/84579/","pdf_url":"https://oa.upm.es/84579/1/10259692.pdf","source":{"id":"https://openalex.org/S4377196323","display_name":"UPM Digital Archive (Technical University of Madrid)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I88060688","host_organization_name":"Universidad Polit\u00e9cnica de Madrid","host_organization_lineage":["https://openalex.org/I88060688"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Proceedings of the 5th on Multimodal Sentiment Analysis Challenge and Workshop: Social Perception and Humor | MuSe'24 | 28/10/2024-01/11/2024 | Melbourne, Australia","raw_type":"info:eu-repo/semantics/acceptedVersion"}],"best_oa_location":{"id":"pmh:oai:oa.upm.es:84579","is_oa":true,"landing_page_url":"https://oa.upm.es/84579/","pdf_url":"https://oa.upm.es/84579/1/10259692.pdf","source":{"id":"https://openalex.org/S4377196323","display_name":"UPM Digital Archive (Technical University of Madrid)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I88060688","host_organization_name":"Universidad Polit\u00e9cnica de Madrid","host_organization_lineage":["https://openalex.org/I88060688"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Proceedings of the 5th on Multimodal Sentiment Analysis Challenge and Workshop: Social Perception and Humor | MuSe'24 | 28/10/2024-01/11/2024 | Melbourne, Australia","raw_type":"info:eu-repo/semantics/acceptedVersion"},"sustainable_development_goals":[{"score":0.7599999904632568,"id":"https://metadata.un.org/sdg/10","display_name":"Reduced inequalities"}],"awards":[{"id":"https://openalex.org/G5608557609","display_name":null,"funder_award_id":"101071191 ? HORIZON-EIC-2021-PATHFINDERCHALLENGES-01","funder_id":"https://openalex.org/F4320323817","funder_display_name":"Universitas Brawijaya"}],"funders":[{"id":"https://openalex.org/F4320323817","display_name":"Universitas Brawijaya","ror":"https://ror.org/01wk3d929"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4403713326.pdf","grobid_xml":"https://content.openalex.org/works/W4403713326.grobid-xml"},"referenced_works_count":7,"referenced_works":["https://openalex.org/W591589697","https://openalex.org/W2064652897","https://openalex.org/W2120349889","https://openalex.org/W3213879871","https://openalex.org/W4361994820","https://openalex.org/W4386161521","https://openalex.org/W4400615682"],"related_works":["https://openalex.org/W1995622179","https://openalex.org/W1484111231","https://openalex.org/W4391160746","https://openalex.org/W1552543208","https://openalex.org/W2074396517","https://openalex.org/W2166963679","https://openalex.org/W2187269125","https://openalex.org/W1641615907","https://openalex.org/W3089231081","https://openalex.org/W2093956241"],"abstract_inverted_index":{"Designing":[0],"reliable":[1],"automatic":[2],"models":[3,157],"for":[4,63,92,100,167,184,234,279],"social":[5,28,95],"perception":[6],"can":[7,120,247],"contribute":[8],"to":[9,36,97,123,144,212,268,274],"a":[10,88,101,128,160,174,221,243,266],"better":[11],"understanding":[12],"of":[13,40,57,73,103,170,178,190,225,231,259],"human":[14],"behavior,":[15],"enabling":[16],"more":[17],"trustworthy":[18],"experiences":[19],"in":[20,66,106,238,270],"the":[21,37,55,67,74,124,153,164,171,181,196,200,229,276,280],"multimedia":[22],"on-line":[23],"communication":[24],"environment.":[25],"However,":[26],"predicting":[27],"attributes":[29,96],"from":[30,152],"video":[31,108],"data":[32],"remains":[33],"challenging":[34],"due":[35],"complex":[38],"interplay":[39],"visual,":[41],"auditory,":[42],"and":[43,155,158,203],"linguistic":[44],"cues.":[45],"In":[46,241],"this":[47,51,239,263],"paper,":[48],"we":[49,140],"address":[50],"challenge":[52],"by":[53,133,194],"investigating":[54],"effectiveness":[56],"Multimodal":[58],"Large":[59],"Language":[60],"Models":[61],"(MM-LLMs)":[62],"feature":[64,138,218],"extraction":[65],"MuSe-Perception":[68],"challenge.":[69],"Firstly,":[70],"our":[71,226],"analysis":[72,224],"novel":[75],"LMU-ELP":[76],"dataset":[77],"has":[78],"revealed":[79],"high":[80],"correlations":[81],"between":[82],"certain":[83],"perceptual":[84],"dimensions,":[85],"motivating":[86],"using":[87,180],"single":[89],"regression":[90,236,277],"model":[91],"all":[93],"16":[94],"be":[98,121,248],"predicted":[99],"set":[102],"speakers":[104],"appearing":[105],"recorded":[107],"clips.":[109],"We":[110,148,261],"demonstrate":[111],"that":[112],"dimensionality":[113],"reduction":[114],"through":[115],"Principal":[116],"Component":[117],"Analysis":[118],"(PCA)":[119],"applied":[122],"label":[125],"space":[126],"without":[127],"relevant":[129],"performance":[130,237,278],"loss.":[131],"Secondly,":[132],"employing":[134],"frozen":[135],"MM-LLMs":[136,216],"as":[137,217,265],"extractors,":[139],"explore":[141],"their":[142],"ability":[143],"capture":[145],"perception-related":[146],"information.":[147],"extract":[149],"sequence":[150],"embeddings":[151],"Qwen-VL":[154],"Qwen-Audio":[156],"train":[159],"Multi-Layer":[161],"Perceptron":[162],"over":[163,199],"attention-pooled":[165],"vectors":[166],"each":[168],"one":[169],"encoders,":[172],"obtaining":[173],"mean":[175],"Pearson":[176,232,245],"correlation":[177,233],"0.22":[179],"average":[182],"predictions":[183],"both":[185],"models.":[186],"Our":[187],"best":[188],"result":[189,264],"0.31":[191],"is":[192],"achieved":[193],"training":[195],"same":[197],"architecture":[198],"baseline":[201],"vit-ver":[202],"w2v-msp":[204],"features,":[205],"which":[206],"motivates":[207],"further":[208],"exploration":[209],"on":[210],"how":[211],"effectively":[213],"leverage":[214],"advanced":[215],"extractors.":[219],"Lastly,":[220],"post":[222],"hoc":[223],"results":[227],"highlights":[228],"limitations":[230],"evaluating":[235],"context.":[240],"particular,":[242],"similar":[244],"coefficient":[246],"obtained":[249],"with":[250],"two":[251],"very":[252],"different":[253,257],"prediction":[254],"sets":[255],"displaying":[256],"levels":[258],"variability.":[260],"take":[262],"call":[267],"action":[269],"exploring":[271],"alternative":[272],"metrics":[273],"assess":[275],"task.":[281]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":1}],"updated_date":"2025-12-27T23:08:20.325037","created_date":"2025-10-10T00:00:00"}
