{"id":"https://openalex.org/W7133508040","doi":"https://doi.org/10.48550/arxiv.2603.02663","title":"Evaluating Cross-Modal Reasoning Ability and Problem Characteristics with Multimodal Item Response Theory","display_name":"Evaluating Cross-Modal Reasoning Ability and Problem Characteristics with Multimodal Item Response Theory","publication_year":2026,"publication_date":"2026-03-03","ids":{"openalex":"https://openalex.org/W7133508040","doi":"https://doi.org/10.48550/arxiv.2603.02663"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.02663","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.02663","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.02663","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5128079883","display_name":"Shunki Uebayashi","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Uebayashi, Shunki","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5064089679","display_name":"Kento Masui","orcid":"https://orcid.org/0000-0002-4174-4378"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Masui, Kento","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5024780466","display_name":"Kyohei Atarashi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Atarashi, Kyohei","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128069111","display_name":"Han Bao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bao, Han","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128116650","display_name":"Hisashi Kashima","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kashima, Hisashi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128095147","display_name":"Naoto Inoue","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Inoue, Naoto","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128072439","display_name":"Mayu Otani","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Otani, Mayu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5088800219","display_name":"Koh Takeuchi","orcid":"https://orcid.org/0000-0002-3245-888X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Takeuchi, Koh","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5128079883"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.6129999756813049,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.6129999756813049,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.08290000259876251,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11902","display_name":"Intelligent Tutoring Systems and Adaptive Learning","score":0.07980000227689743,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/ranking","display_name":"Ranking (information retrieval)","score":0.5856999754905701},{"id":"https://openalex.org/keywords/measure","display_name":"Measure (data warehouse)","score":0.574400007724762},{"id":"https://openalex.org/keywords/fidelity","display_name":"Fidelity","score":0.557699978351593},{"id":"https://openalex.org/keywords/item-response-theory","display_name":"Item response theory","score":0.446399986743927},{"id":"https://openalex.org/keywords/visual-reasoning","display_name":"Visual reasoning","score":0.3434999883174896},{"id":"https://openalex.org/keywords/computational-model","display_name":"Computational model","score":0.31049999594688416}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.744700014591217},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5896999835968018},{"id":"https://openalex.org/C189430467","wikidata":"https://www.wikidata.org/wiki/Q7293293","display_name":"Ranking (information retrieval)","level":2,"score":0.5856999754905701},{"id":"https://openalex.org/C2780009758","wikidata":"https://www.wikidata.org/wiki/Q6804172","display_name":"Measure (data warehouse)","level":2,"score":0.574400007724762},{"id":"https://openalex.org/C2776459999","wikidata":"https://www.wikidata.org/wiki/Q2119376","display_name":"Fidelity","level":2,"score":0.557699978351593},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4945000112056732},{"id":"https://openalex.org/C19875794","wikidata":"https://www.wikidata.org/wiki/Q1207340","display_name":"Item response theory","level":3,"score":0.446399986743927},{"id":"https://openalex.org/C2777508537","wikidata":"https://www.wikidata.org/wiki/Q7936620","display_name":"Visual reasoning","level":2,"score":0.3434999883174896},{"id":"https://openalex.org/C66024118","wikidata":"https://www.wikidata.org/wiki/Q1122506","display_name":"Computational model","level":2,"score":0.31049999594688416},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.2870999872684479},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.2782999873161316},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.267300009727478}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.02663","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.02663","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.02663","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.02663","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Multimodal":[0],"Large":[1],"Language":[2],"Models":[3],"(MLLMs)":[4],"have":[5],"recently":[6],"emerged":[7],"as":[8],"general":[9],"architectures":[10],"capable":[11],"of":[12,76,111,147],"reasoning":[13,170],"over":[14,138],"diverse":[15],"modalities.":[16],"Benchmarks":[17],"for":[18,24,167],"MLLMs":[19,112],"should":[20],"measure":[21],"their":[22],"ability":[23,97,110],"cross-modal":[25,105,109,116,136,169],"integration.":[26],"However,":[27],"current":[28],"benchmarks":[29],"are":[30,149],"filled":[31],"with":[32],"shortcut":[33],"questions,":[34,153],"which":[35],"can":[36,54],"be":[37],"solved":[38],"using":[39],"only":[40],"a":[41,80,164],"single":[42],"modality,":[43],"thereby":[44,154],"yielding":[45],"unreliable":[46],"rankings.":[47],"For":[48],"example,":[49],"in":[50],"vision-language":[51],"cases,":[52],"we":[53],"find":[55],"the":[56,61,64,71],"correct":[57],"answer":[58],"without":[59],"either":[60],"image":[62],"or":[63],"text.":[65],"These":[66],"low-quality":[67,152],"questions":[68,137],"unnecessarily":[69],"increase":[70],"size":[72],"and":[73,82,98,104,113,140,171],"computational":[74],"requirements":[75],"benchmarks.":[77,174],"We":[78],"introduce":[79],"multi-modal":[81],"multidimensional":[83],"item":[84,99],"response":[85],"theory":[86],"framework":[87],"(M3IRT)":[88],"that":[89,122],"extends":[90],"classical":[91],"IRT":[92],"by":[93],"decomposing":[94],"both":[95],"model":[96],"difficulty":[100],"into":[101],"image-only,":[102],"text-only,":[103],"components.":[106],"M3IRT":[107,133,161],"estimates":[108],"each":[114],"question's":[115],"difficulty,":[117],"enabling":[118],"compact,":[119],"high-quality":[120],"subsets":[121],"better":[123],"reflect":[124],"multimodal":[125,173],"reasoning.":[126],"Across":[127],"24":[128],"VLMs":[129],"on":[130],"three":[131],"benchmarks,":[132],"prioritizes":[134],"genuinely":[135],"shortcuts":[139],"preserves":[141],"ranking":[142],"fidelity":[143],"even":[144],"when":[145],"50%":[146],"items":[148],"artificially":[150],"generated":[151],"reducing":[155],"evaluation":[156],"cost":[157],"while":[158],"improving":[159],"reliability.":[160],"thus":[162],"offers":[163],"practical":[165],"tool":[166],"assessing":[168],"refining":[172]},"counts_by_year":[],"updated_date":"2026-03-05T07:36:02.291473","created_date":"2026-03-05T00:00:00"}
