{"id":"https://openalex.org/W4412887861","doi":"https://doi.org/10.18653/v1/2025.findings-acl.1112","title":"MPBench: A Comprehensive Multimodal Reasoning Benchmark for Process Errors Identification","display_name":"MPBench: A Comprehensive Multimodal Reasoning Benchmark for Process Errors Identification","publication_year":2025,"publication_date":"2025-01-01","ids":{"openalex":"https://openalex.org/W4412887861","doi":"https://doi.org/10.18653/v1/2025.findings-acl.1112"},"language":"en","primary_location":{"id":"doi:10.18653/v1/2025.findings-acl.1112","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.findings-acl.1112","pdf_url":"https://aclanthology.org/2025.findings-acl.1112.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Findings of the Association for Computational Linguistics: ACL 2025","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://aclanthology.org/2025.findings-acl.1112.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100606628","display_name":"Xin Pan","orcid":"https://orcid.org/0000-0001-5326-4323"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"xu Zhao Pan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101569327","display_name":"Pengfei Zhou","orcid":"https://orcid.org/0000-0001-6395-8708"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Pengfei Zhou","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038964194","display_name":"Jingjing Ai","orcid":"https://orcid.org/0009-0005-1780-8553"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jiaxin Ai","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5081255558","display_name":"Wangbo Zhao","orcid":"https://orcid.org/0000-0001-9545-7991"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wangbo Zhao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5115597295","display_name":"Kai Wang","orcid":"https://orcid.org/0009-0004-6559-2585"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kai Wang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5033413473","display_name":"Xiaojiang Peng","orcid":"https://orcid.org/0000-0002-5783-321X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xiaojiang Peng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101827257","display_name":"Wenqi Shao","orcid":"https://orcid.org/0000-0003-3781-4086"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wenqi Shao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5023274785","display_name":"Hongxun Yao","orcid":"https://orcid.org/0000-0003-3298-2574"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hongxun Yao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5036606244","display_name":"Kaipeng Zhang","orcid":"https://orcid.org/0000-0001-6105-6532"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kaipeng Zhang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":9,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.9596,"has_fulltext":true,"cited_by_count":1,"citation_normalized_percentile":{"value":0.81426232,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"21586","last_page":"21606"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11357","display_name":"Risk and Safety Analysis","score":0.9901000261306763,"subfield":{"id":"https://openalex.org/subfields/1804","display_name":"Statistics, Probability and Uncertainty"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11357","display_name":"Risk and Safety Analysis","score":0.9901000261306763,"subfield":{"id":"https://openalex.org/subfields/1804","display_name":"Statistics, Probability and Uncertainty"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10876","display_name":"Fault Detection and Control Systems","score":0.972599983215332,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13295","display_name":"Safety Systems Engineering in Autonomy","score":0.9546999931335449,"subfield":{"id":"https://openalex.org/subfields/2213","display_name":"Safety, Risk, Reliability and Quality"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.7369171380996704},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7288233041763306},{"id":"https://openalex.org/keywords/identification","display_name":"Identification (biology)","score":0.6782670021057129},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.6093314290046692},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5403653979301453},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3540833592414856},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.09765163064002991},{"id":"https://openalex.org/keywords/geology","display_name":"Geology","score":0.06373342871665955}],"concepts":[{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.7369171380996704},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7288233041763306},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.6782670021057129},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.6093314290046692},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5403653979301453},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3540833592414856},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.09765163064002991},{"id":"https://openalex.org/C127313418","wikidata":"https://www.wikidata.org/wiki/Q1069","display_name":"Geology","level":0,"score":0.06373342871665955},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C59822182","wikidata":"https://www.wikidata.org/wiki/Q441","display_name":"Botany","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.18653/v1/2025.findings-acl.1112","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.findings-acl.1112","pdf_url":"https://aclanthology.org/2025.findings-acl.1112.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Findings of the Association for Computational Linguistics: ACL 2025","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.18653/v1/2025.findings-acl.1112","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.findings-acl.1112","pdf_url":"https://aclanthology.org/2025.findings-acl.1112.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Findings of the Association for Computational Linguistics: ACL 2025","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G3700909421","display_name":null,"funder_award_id":"62176165","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5887223637","display_name":null,"funder_award_id":"62441202","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8888893758","display_name":null,"funder_award_id":"62476069","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4412887861.pdf","grobid_xml":"https://content.openalex.org/works/W4412887861.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2961085424","https://openalex.org/W4306674287","https://openalex.org/W4387369504","https://openalex.org/W4394896187","https://openalex.org/W3170094116","https://openalex.org/W4386462264","https://openalex.org/W3107602296","https://openalex.org/W4364306694","https://openalex.org/W4312192474","https://openalex.org/W4283697347"],"abstract_inverted_index":{"Reasoning":[0,137],"is":[1,20],"an":[2],"essential":[3],"capacity":[4],"for":[5,22,144],"large":[6],"language":[7],"models":[8,28],"(LLMs)":[9],"to":[10,32,86],"address":[11,74],"complex":[12],"tasks,":[13],"where":[14],"the":[15,89,108,116,132,142,160],"identification":[16],"of":[17,59,91,105,118,162],"process":[18],"errors":[19],"vital":[21],"improving":[23,54],"this":[24,75],"ability.Recently,":[25],"process-level":[26],"reward":[27],"(PRMs)":[29],"were":[30],"proposed":[31],"provide":[33],"step-wise":[34],"rewards":[35],"that":[36],"facilitate":[37],"reinforcement":[38],"learning":[39],"and":[40,45,63,130,135,156],"data":[41],"production":[42],"during":[43,51,148],"training":[44],"guide":[46],"LLMs":[47],"toward":[48],"correct":[49],"steps":[50,147],"inference,":[52],"thereby":[53],"reasoning":[55,72,109,121,146],"accuracy.However,":[56],"existing":[57],"benchmarks":[58],"PRMs":[60,92,106],"are":[61],"text-based":[62],"focus":[64],"on":[65],"error":[66],"detection,":[67],"neglecting":[68],"other":[69],"scenarios":[70],"like":[71],"search.To":[73],"gap,":[76],"we":[77],"introduce":[78],"MPBench,":[79],"a":[80,102],"comprehensive,":[81],"multi-task,":[82],"multimodal":[83,163],"benchmark":[84],"designed":[85],"systematically":[87],"assess":[88],"effectiveness":[90],"in":[93,107],"diverse":[94],"scenarios.MPBench":[95],"employs":[96],"three":[97],"evaluation":[98],"paradigms,":[99,151],"each":[100,119],"targeting":[101],"specific":[103],"role":[104],"process:":[110],"(1)":[111],"Step":[112],"Correctness,":[113],"which":[114,126,140],"assesses":[115],"correctness":[117],"intermediate":[120],"step;":[122],"(2)":[123],"Answer":[124],"Aggregation,":[125],"aggregates":[127],"multiple":[128],"solutions":[129],"selects":[131],"best":[133],"one;":[134],"(3)":[136],"Process":[138],"Search,":[139],"guides":[141],"search":[143],"optimal":[145],"inference.Through":[149],"these":[150],"MPBench":[152],"makes":[153],"comprehensive":[154],"evaluations":[155],"provides":[157],"insights":[158],"into":[159],"development":[161],"PRMs.":[164]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
