{"id":"https://openalex.org/W4407633293","doi":"https://doi.org/10.48550/arxiv.2502.10391","title":"MM-RLHF: The Next Step Forward in Multimodal LLM Alignment","display_name":"MM-RLHF: The Next Step Forward in Multimodal LLM Alignment","publication_year":2025,"publication_date":"2025-02-14","ids":{"openalex":"https://openalex.org/W4407633293","doi":"https://doi.org/10.48550/arxiv.2502.10391"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2502.10391","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2502.10391","pdf_url":"https://arxiv.org/pdf/2502.10391","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2502.10391","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100376960","display_name":"Yifan Zhang","orcid":"https://orcid.org/0000-0003-1289-2192"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Zhang, Yi-Fan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103652301","display_name":"Tao Yu","orcid":"https://orcid.org/0000-0003-2523-4404"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yu, Tao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5046473687","display_name":"Haochen Tian","orcid":"https://orcid.org/0000-0002-2650-4915"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tian, Haochen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014172220","display_name":"Chaoyou Fu","orcid":"https://orcid.org/0000-0002-0079-7668"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fu, Chaoyou","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101867544","display_name":"Peiyan Li","orcid":"https://orcid.org/0000-0001-8440-0674"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Peiyan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100589768","display_name":"Jianshu Zeng","orcid":"https://orcid.org/0009-0005-5326-4442"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zeng, Jianshu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5107746266","display_name":"Wulin Xie","orcid":"https://orcid.org/0009-0000-9535-4116"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xie, Wulin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101908671","display_name":"Yang Shi","orcid":"https://orcid.org/0000-0002-9579-9403"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shi, Yang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101468621","display_name":"Huanyu Zhang","orcid":"https://orcid.org/0000-0001-6480-2510"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Huanyu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5054081824","display_name":"Jianjun Wu","orcid":"https://orcid.org/0000-0003-0277-2189"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wu, Junkang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100422551","display_name":"Xue Wang","orcid":"https://orcid.org/0000-0001-8950-4501"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Xue","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5074473139","display_name":"Yibo Hu","orcid":"https://orcid.org/0000-0003-3409-3164"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hu, Yibo","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101739645","display_name":"Bin Wen","orcid":"https://orcid.org/0000-0003-3449-5841"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wen, Bin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5115602252","display_name":"Fan Yang","orcid":"https://orcid.org/0000-0002-8111-4880"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Fan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5081038641","display_name":"Zhang Zhang","orcid":"https://orcid.org/0000-0002-7369-4539"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Zhang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5112381636","display_name":"Tingting Gao","orcid":"https://orcid.org/0000-0003-2765-4963"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gao, Tingting","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101781790","display_name":"Di Zhang","orcid":"https://orcid.org/0000-0003-0237-2361"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Di","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5115602506","display_name":"Liang Wang","orcid":"https://orcid.org/0000-0001-5224-8647"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Liang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109083484","display_name":"Rong Jin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jin, Rong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5111885963","display_name":"Tieniu Tan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tan, Tieniu","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":20,"corresponding_author_ids":["https://openalex.org/A5100376960"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10215","display_name":"Semantic Web and Ontologies","score":0.9017000198364258,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.47323617339134216},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3416309952735901}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.47323617339134216},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3416309952735901}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2502.10391","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2502.10391","pdf_url":"https://arxiv.org/pdf/2502.10391","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2502.10391","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2502.10391","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2502.10391","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2502.10391","pdf_url":"https://arxiv.org/pdf/2502.10391","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052"],"abstract_inverted_index":{"Despite":[0],"notable":[1],"advancements":[2],"in":[3,31,187,204,211],"Multimodal":[4],"Large":[5],"Language":[6],"Models":[7],"(MLLMs),":[8],"most":[9],"state-of-the-art":[10],"models":[11,44,103],"have":[12,214],"not":[13],"undergone":[14],"thorough":[15],"alignment":[16,25,108,197],"with":[17,45,180,193],"human":[18,46],"preferences.":[19],"This":[20,71],"gap":[21],"exists":[22],"because":[23],"current":[24],"research":[26],"has":[27],"primarily":[28],"achieved":[29],"progress":[30],"specific":[32],"areas":[33],"(e.g.,":[34],"hallucination":[35],"reduction),":[36],"while":[37],"the":[38,99,105,149,157,162,216],"broader":[39],"question":[40],"of":[41,101,107,120,152,164],"whether":[42],"aligning":[43],"preferences":[47],"can":[48],"systematically":[49],"enhance":[50],"MLLM":[51],"capability":[52],"remains":[53],"largely":[54],"unexplored.":[55],"To":[56],"this":[57,89],"end,":[58],"we":[59,91,111,140],"introduce":[60,112],"MM-RLHF,":[61],"a":[62,74,113,145,201,208],"dataset":[63,72],"containing":[64],"$\\mathbf{120k}$":[65],"fine-grained,":[66],"human-annotated":[67],"preference":[68,217],"comparison":[69,166],"pairs.":[70,167],"represents":[73],"substantial":[75],"advancement":[76],"over":[77],"existing":[78],"resources,":[79],"offering":[80,126],"superior":[81],"size,":[82],"diversity,":[83],"annotation":[84],"granularity,":[85],"and":[86,104,129,177,184,195,207,222,230],"quality.":[87],"Leveraging":[88],"dataset,":[90,218],"propose":[92,141],"several":[93],"key":[94],"innovations":[95],"to":[96,134,156,200],"improve":[97],"both":[98],"quality":[100],"reward":[102,137,158,219,228],"efficiency":[106],"algorithms.":[109],"Notably,":[110],"Critique-Based":[114],"Reward":[115,143],"Model,":[116],"which":[117],"generates":[118],"critiques":[119],"model":[121,188],"outputs":[122],"before":[123],"assigning":[124],"scores,":[125],"enhanced":[127],"interpretability":[128],"more":[130,234],"informative":[131],"feedback":[132],"compared":[133],"traditional":[135],"scalar":[136],"mechanisms.":[138],"Additionally,":[139],"Dynamic":[142],"Scaling,":[144],"method":[146],"that":[147],"adjusts":[148],"loss":[150],"weight":[151],"each":[153],"sample":[154],"according":[155],"signal,":[159],"thereby":[160],"optimizing":[161],"use":[163],"high-quality":[165],"Our":[168],"approach":[169],"is":[170],"rigorously":[171],"evaluated":[172],"across":[173],"$\\mathbf{10}$":[174],"distinct":[175],"dimensions":[176],"$\\mathbf{27}$":[178],"benchmarks,":[179],"results":[181],"demonstrating":[182],"significant":[183],"consistent":[185],"improvements":[186],"performance.":[189],"Specifically,":[190],"fine-tuning":[191],"LLaVA-ov-7B":[192],"MM-RLHF":[194],"our":[196,238],"algorithm":[198],"leads":[199],"$\\mathbf{19.5}$%":[202],"increase":[203],"conversational":[205],"abilities":[206],"$\\mathbf{60}$%":[209],"improvement":[210],"safety.":[212],"We":[213],"open-sourced":[215],"model,":[220],"training":[221],"evaluation":[223],"code,":[224],"as":[225,227],"well":[226],"modeling":[229],"safety":[231],"benchmarks.":[232],"For":[233],"details,":[235],"please":[236],"visit":[237],"project":[239],"page:":[240],"https://mm-rlhf.github.io.":[241]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2025-10-10T00:00:00"}
