{"id":"https://openalex.org/W4400374599","doi":"https://doi.org/10.48550/arxiv.2407.02477","title":"Understanding Alignment in Multimodal LLMs: A Comprehensive Study","display_name":"Understanding Alignment in Multimodal LLMs: A Comprehensive Study","publication_year":2024,"publication_date":"2024-07-02","ids":{"openalex":"https://openalex.org/W4400374599","doi":"https://doi.org/10.48550/arxiv.2407.02477"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2407.02477","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2407.02477","pdf_url":"https://arxiv.org/pdf/2407.02477","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2407.02477","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100029860","display_name":"Elmira Amirloo","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Amirloo, Elmira","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5041899005","display_name":"Jean-Philippe Fauconnier","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fauconnier, Jean-Philippe","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100029861","display_name":"Christoph Roesmann","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Roesmann, Christoph","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100029862","display_name":"Christian Kerl","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kerl, Christian","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100029863","display_name":"Rinu Boney","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Boney, Rinu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100029864","display_name":"Yusu Qian","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Qian, Yusu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100687845","display_name":"Zirui Wang","orcid":"https://orcid.org/0000-0002-5927-6322"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Zirui","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100029865","display_name":"Afshin Dehghan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dehghan, Afshin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5083263708","display_name":"Yinfei Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Yinfei","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5066666034","display_name":"Zhe Gan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gan, Zhe","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5094170434","display_name":"Peter Grasch","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Grasch, Peter","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":11,"corresponding_author_ids":["https://openalex.org/A5100029860"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":1,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9954000115394592,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9954000115394592,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12881","display_name":"linguistics and terminology studies","score":0.9829999804496765,"subfield":{"id":"https://openalex.org/subfields/1203","display_name":"Language and Linguistics"},"field":{"id":"https://openalex.org/fields/12","display_name":"Arts and Humanities"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10759","display_name":"Translation Studies and Practices","score":0.9768000245094299,"subfield":{"id":"https://openalex.org/subfields/1203","display_name":"Language and Linguistics"},"field":{"id":"https://openalex.org/fields/12","display_name":"Arts and Humanities"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/political-science","display_name":"Political science","score":0.3327797055244446}],"concepts":[{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.3327797055244446}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2407.02477","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2407.02477","pdf_url":"https://arxiv.org/pdf/2407.02477","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"doi:10.48550/arxiv.2407.02477","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2407.02477","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2407.02477","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2407.02477","pdf_url":"https://arxiv.org/pdf/2407.02477","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4400374599.pdf","grobid_xml":"https://content.openalex.org/works/W4400374599.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W2949263084","https://openalex.org/W2743539335","https://openalex.org/W594353338","https://openalex.org/W2922049016","https://openalex.org/W4390697879","https://openalex.org/W2070214669","https://openalex.org/W2724734218","https://openalex.org/W4382466601"],"abstract_inverted_index":{"Preference":[0,100],"alignment":[1,68,96,118,146,154,244],"has":[2],"become":[3],"a":[4,188,212,250],"crucial":[5],"component":[6],"in":[7,19,112,133,147,183],"enhancing":[8],"the":[9,61,130,153,178,181,198],"performance":[10,179,240],"of":[11,67,144,180,190,200,215,252],"Large":[12,21],"Language":[13,22],"Models":[14,23],"(LLMs),":[15],"yet":[16],"its":[17],"impact":[18,203],"Multimodal":[20],"(MLLMs)":[24],"remains":[25,121],"comparatively":[26],"underexplored.":[27],"Similar":[28],"to":[29,72,76,110,129,241],"language":[30],"models,":[31,232],"MLLMs":[32,70,92],"for":[33,69,91,246],"image":[34,62,82],"understanding":[35],"tasks":[36],"encounter":[37],"challenges":[38],"like":[39],"hallucination.":[40],"In":[41,136],"MLLMs,":[42],"hallucination":[43],"can":[44,176,237],"occur":[45],"not":[46],"only":[47],"by":[48,54,151],"stating":[49],"incorrect":[50],"facts":[51],"but":[52],"also":[53],"producing":[55],"responses":[56,78],"that":[57,170,225,235],"are":[58],"inconsistent":[59],"with":[60,81],"content.":[63],"A":[64],"primary":[65],"objective":[66],"is":[71],"encourage":[73],"these":[74,134,208],"models":[75,248],"align":[77],"more":[79],"closely":[80],"information.":[83],"Recently,":[84],"multiple":[85],"works":[86],"have":[87],"introduced":[88],"preference":[89,145,193,218],"datasets":[90,194],"and":[93,103,117,163,168,173,195,233],"examined":[94],"different":[95],"methods,":[97,119],"including":[98],"Direct":[99],"Optimization":[101,106],"(DPO)":[102],"Proximal":[104],"Policy":[105],"(PPO).":[107],"However,":[108],"due":[109],"variations":[111],"datasets,":[113],"base":[114],"model":[115,182,204],"types,":[116],"it":[120,236],"unclear":[122],"which":[123],"specific":[124],"elements":[125],"contribute":[126],"most":[127],"significantly":[128],"reported":[131],"improvements":[132],"works.":[135],"this":[137],"paper,":[138],"we":[139,210],"independently":[140],"analyze":[141],"each":[142],"aspect":[143],"MLLMs.":[148],"We":[149,186],"start":[150],"categorizing":[152],"algorithms":[155],"into":[156],"two":[157],"groups,":[158],"offline":[159,172],"(such":[160,165],"as":[161,166],"DPO),":[162],"online":[164,174],"online-DPO),":[167],"show":[169,234],"combining":[171],"methods":[175],"improve":[177],"certain":[184],"scenarios.":[185],"review":[187],"variety":[189],"published":[191,243],"multimodal":[192,217,247],"discuss":[196],"how":[197],"details":[199],"their":[201],"construction":[202],"performance.":[205],"Based":[206],"on":[207],"insights,":[209],"introduce":[211],"novel":[213],"way":[214],"creating":[216],"data":[219],"called":[220],"Bias-Driven":[221],"Hallucination":[222],"Sampling":[223],"(BDHS)":[224],"needs":[226],"neither":[227],"additional":[228],"annotation":[229],"nor":[230],"external":[231],"achieve":[238],"competitive":[239],"previously":[242],"work":[245],"across":[249],"range":[251],"benchmarks.":[253]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-03-08T08:50:53.379069","created_date":"2025-10-10T00:00:00"}
