{"id":"https://openalex.org/W7163043436","doi":"https://doi.org/10.48550/arxiv.2605.31556","title":"Vision-Language Models Suppress Female Representations Under Ambiguous Input","display_name":"Vision-Language Models Suppress Female Representations Under Ambiguous Input","publication_year":2026,"publication_date":"2026-05-29","ids":{"openalex":"https://openalex.org/W7163043436","doi":"https://doi.org/10.48550/arxiv.2605.31556"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.31556","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.31556","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Preprint"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.31556","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5137589259","display_name":"Arnau Marin-Llobet","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Marin-Llobet, Arnau","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5137613278","display_name":"Simon Henniger","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Henniger, Simon","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5015258558","display_name":"Mahzarin R. Banaji","orcid":"https://orcid.org/0000-0002-5941-7455"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Banaji, Mahzarin R.","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.24379999935626984,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.24379999935626984,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.17419999837875366,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11094","display_name":"Face Recognition and Perception","score":0.05510000139474869,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/association","display_name":"Association (psychology)","score":0.6072999835014343},{"id":"https://openalex.org/keywords/filter","display_name":"Filter (signal processing)","score":0.5848000049591064},{"id":"https://openalex.org/keywords/metric","display_name":"Metric (unit)","score":0.5806999802589417},{"id":"https://openalex.org/keywords/encode","display_name":"ENCODE","score":0.5582000017166138},{"id":"https://openalex.org/keywords/measure","display_name":"Measure (data warehouse)","score":0.5324000120162964},{"id":"https://openalex.org/keywords/security-token","display_name":"Security token","score":0.5029000043869019},{"id":"https://openalex.org/keywords/signal","display_name":"SIGNAL (programming language)","score":0.4772999882698059},{"id":"https://openalex.org/keywords/space","display_name":"Space (punctuation)","score":0.4131999909877777}],"concepts":[{"id":"https://openalex.org/C142853389","wikidata":"https://www.wikidata.org/wiki/Q744778","display_name":"Association (psychology)","level":2,"score":0.6072999835014343},{"id":"https://openalex.org/C106131492","wikidata":"https://www.wikidata.org/wiki/Q3072260","display_name":"Filter (signal processing)","level":2,"score":0.5848000049591064},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.5806999802589417},{"id":"https://openalex.org/C66746571","wikidata":"https://www.wikidata.org/wiki/Q1134833","display_name":"ENCODE","level":3,"score":0.5582000017166138},{"id":"https://openalex.org/C2780009758","wikidata":"https://www.wikidata.org/wiki/Q6804172","display_name":"Measure (data warehouse)","level":2,"score":0.5324000120162964},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5074999928474426},{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.5029000043869019},{"id":"https://openalex.org/C2779843651","wikidata":"https://www.wikidata.org/wiki/Q7390335","display_name":"SIGNAL (programming language)","level":2,"score":0.4772999882698059},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.4154999852180481},{"id":"https://openalex.org/C2778572836","wikidata":"https://www.wikidata.org/wiki/Q380933","display_name":"Space (punctuation)","level":2,"score":0.4131999909877777},{"id":"https://openalex.org/C46312422","wikidata":"https://www.wikidata.org/wiki/Q11024","display_name":"Communication","level":1,"score":0.39100000262260437},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3894999921321869},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.384799987077713},{"id":"https://openalex.org/C2778355321","wikidata":"https://www.wikidata.org/wiki/Q17079427","display_name":"Identity (music)","level":2,"score":0.3774000108242035},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.35260000824928284},{"id":"https://openalex.org/C124304363","wikidata":"https://www.wikidata.org/wiki/Q673661","display_name":"Abstraction","level":2,"score":0.33570000529289246},{"id":"https://openalex.org/C143271835","wikidata":"https://www.wikidata.org/wiki/Q254515","display_name":"Similitude","level":2,"score":0.335099995136261},{"id":"https://openalex.org/C2780522230","wikidata":"https://www.wikidata.org/wiki/Q1140419","display_name":"Ambiguity","level":2,"score":0.3181000053882599},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.3084999918937683},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.2996000051498413},{"id":"https://openalex.org/C100515483","wikidata":"https://www.wikidata.org/wiki/Q3268235","display_name":"Filter bank","level":3,"score":0.2903999984264374},{"id":"https://openalex.org/C44725695","wikidata":"https://www.wikidata.org/wiki/Q288156","display_name":"Normative","level":2,"score":0.2754000127315521},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.27140000462532043},{"id":"https://openalex.org/C2776502983","wikidata":"https://www.wikidata.org/wiki/Q690182","display_name":"Contrast (vision)","level":2,"score":0.2702000141143799},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.26899999380111694},{"id":"https://openalex.org/C111370547","wikidata":"https://www.wikidata.org/wiki/Q7451120","display_name":"Sensory cue","level":2,"score":0.26579999923706055}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.31556","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.31556","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"Preprint"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.31556","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.31556","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Preprint"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/5","display_name":"Gender equality","score":0.7577025890350342}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Alignment":[0],"teaches":[1],"vision-language":[2],"models":[3,58,73,121],"(VLMs)":[4],"to":[5,60,96],"avoid":[6],"expressing":[7],"demographic":[8],"biases,":[9],"and":[10,102,111,116,147,153],"when":[11,52],"gender":[12],"is":[13,21,148],"clearly":[14],"visible":[15],"they":[16],"largely":[17],"succeed.":[18],"Far":[19],"less":[20],"known":[22],"about":[23],"ambiguous":[24,54],"inputs":[25],"(a":[26],"worker":[27],"in":[28,38],"full":[29],"gear,":[30],"a":[31,84,124,154],"figure":[32],"seen":[33],"from":[34],"behind)":[35],"cases":[36],"common":[37],"practice":[39],"yet":[40,128],"rarely":[41],"studied.":[42],"We":[43,77],"find":[44],"that":[45,87,158],"minimal":[46],"prompting":[47,53],"pressure":[48],"exposes":[49],"occupation-gender":[50],"defaults":[51],"input":[55],"images,":[56,110],"with":[57],"collapsing":[59],"male":[61,138],"even":[62],"for":[63],"strongly":[64],"female-stereotyped":[65],"occupations.":[66],"But":[67],"do":[68],"these":[69,169],"outputs":[70,117],"reflect":[71],"what":[72],"actually":[74],"encode":[75,123],"internally?":[76],"introduce":[78],"LALS":[79],"(Latent":[80],"Association":[81],"Leaning":[82],"Score),":[83],"zero-shot":[85],"metric":[86],"projects":[88],"visual-token":[89],"activations":[90],"into":[91],"the":[92],"model's":[93],"text-embedding":[94],"space":[95],"measure":[97],"concept":[98],"associations":[99],"per":[100],"token":[101],"layer.":[103],"Across":[104],"15":[105],"occupations,":[106],"over":[107],"800":[108],"gender-ambiguous":[109],"four":[112],"VLMs,":[113],"internal":[114,170],"representations":[115],"are":[118],"systematically":[119],"decoupled:":[120],"often":[122],"female":[125,143],"association":[126],"internally":[127],"output":[129],"male.":[130],"Layer-wise":[131],"analysis":[132],"reveals":[133],"an":[134],"asymmetric":[135],"filter":[136],"--":[137,152],"signal":[139,144],"amplifies":[140],"end-to-end":[141],"while":[142],"peaks":[145],"mid-network":[146],"suppressed":[149],"before":[150],"generation":[151],"color":[155,166],"ablation":[156],"shows":[157],"culturally":[159],"loaded":[160],"visual":[161],"cues":[162],"such":[163],"as":[164],"clothing":[165],"further":[167],"modulate":[168],"associations.":[171]},"counts_by_year":[],"updated_date":"2026-07-01T06:00:48.157686","created_date":"2026-06-02T00:00:00"}
