{"id":"https://openalex.org/W4415343829","doi":"https://doi.org/10.48550/arxiv.2507.03162","title":"MateInfoUB: A Real-World Benchmark for Testing LLMs in Competitive, Multilingual, and Multimodal Educational Tasks","display_name":"MateInfoUB: A Real-World Benchmark for Testing LLMs in Competitive, Multilingual, and Multimodal Educational Tasks","publication_year":2025,"publication_date":"2025-07-03","ids":{"openalex":"https://openalex.org/W4415343829","doi":"https://doi.org/10.48550/arxiv.2507.03162"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2507.03162","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2507.03162","pdf_url":"https://arxiv.org/pdf/2507.03162","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2507.03162","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5120060827","display_name":"Dumitran Adrian Marius","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Marius, Dumitran Adrian","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5120060828","display_name":"Theodor-Pierre Moroianu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Moroianu, Theodor-Pierre","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5120060829","display_name":"Buca Mihnea-Vicentiu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mihnea-Vicentiu, Buca","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5120060827"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9908999800682068,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9908999800682068,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.6252999901771545},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.6014999747276306},{"id":"https://openalex.org/keywords/romanian","display_name":"Romanian","score":0.539900004863739},{"id":"https://openalex.org/keywords/competition","display_name":"Competition (biology)","score":0.30979999899864197},{"id":"https://openalex.org/keywords/state","display_name":"State (computer science)","score":0.2994000017642975},{"id":"https://openalex.org/keywords/benchmarking","display_name":"Benchmarking","score":0.2874000072479248}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7059000134468079},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.6252999901771545},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.6014999747276306},{"id":"https://openalex.org/C129400051","wikidata":"https://www.wikidata.org/wiki/Q7913","display_name":"Romanian","level":2,"score":0.539900004863739},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.4244999885559082},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.36010000109672546},{"id":"https://openalex.org/C539667460","wikidata":"https://www.wikidata.org/wiki/Q2414942","display_name":"Management science","level":1,"score":0.3176000118255615},{"id":"https://openalex.org/C91306197","wikidata":"https://www.wikidata.org/wiki/Q45767","display_name":"Competition (biology)","level":2,"score":0.30979999899864197},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.2994000017642975},{"id":"https://openalex.org/C56739046","wikidata":"https://www.wikidata.org/wiki/Q192060","display_name":"Knowledge management","level":1,"score":0.2935999929904938},{"id":"https://openalex.org/C86251818","wikidata":"https://www.wikidata.org/wiki/Q816754","display_name":"Benchmarking","level":2,"score":0.2874000072479248},{"id":"https://openalex.org/C2778883600","wikidata":"https://www.wikidata.org/wiki/Q2390977","display_name":"Language proficiency","level":2,"score":0.2865000069141388},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.2786000072956085},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.26980000734329224},{"id":"https://openalex.org/C184356942","wikidata":"https://www.wikidata.org/wiki/Q830382","display_name":"Best practice","level":2,"score":0.2529999911785126}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2507.03162","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2507.03162","pdf_url":"https://arxiv.org/pdf/2507.03162","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2507.03162","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2507.03162","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2507.03162","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2507.03162","pdf_url":"https://arxiv.org/pdf/2507.03162","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"The":[0,96],"rapid":[1],"advancement":[2],"of":[3,50,62,74,95,116,122,151,156],"Large":[4],"Language":[5],"Models":[6],"(LLMs)":[7],"has":[8],"transformed":[9],"various":[10],"domains,":[11],"particularly":[12],"computer":[13,57],"science":[14,58],"(CS)":[15],"education.":[16],"These":[17,159],"models":[18],"exhibit":[19],"remarkable":[20],"capabilities":[21],"in":[22,34,133,153,180,202],"code-related":[23],"tasks":[24],"and":[25,32,47,114,136,148,167,183,205],"problem-solving,":[26],"raising":[27],"questions":[28,52],"about":[29],"their":[30,103,131],"potential":[31],"limitations":[33,115],"advanced":[35],"CS":[36,134],"contexts.":[37],"This":[38],"study":[39],"presents":[40],"a":[41,55],"novel":[42],"bilingual":[43],"(English-Romanian)":[44],"multimodal":[45],"(text":[46],"image)":[48],"dataset":[49,64,174,201],"multiple-choice":[51],"derived":[53],"from":[54],"high-level":[56],"competition.":[59],"A":[60],"particularity":[61],"our":[63,173],"is":[65,88],"that":[66,72],"the":[67,112,120,149,154,200],"problems":[68],"are":[69,76],"conceived":[70],"such":[71],"some":[73],"them":[75,196],"easier":[77],"solved":[78],"using":[79,199],"reasoning":[80],"on":[81,99,105],"paper,":[82],"while":[83],"for":[84,192],"others":[85],"writing":[86],"code":[87],"more":[89],"efficient.":[90],"We":[91,139],"systematically":[92],"evaluate":[93],"State":[94],"Art":[97],"LLMs":[98],"this":[100],"dataset,":[101],"analyzing":[102],"performance":[104],"theoretical":[106],"programming":[107],"tasks.":[108],"Our":[109],"findings":[110],"reveal":[111],"strengths":[113],"current":[117],"LLMs,":[118],"including":[119],"influence":[121],"language":[123],"choice":[124],"(English":[125],"vs.":[126],"Romanian),":[127],"providing":[128],"insights":[129],"into":[130],"applicability":[132],"education":[135],"competition":[137],"settings.":[138],"also":[140],"address":[141],"critical":[142],"ethical":[143],"considerations":[144],"surrounding":[145],"educational":[146,165,189],"integrity":[147],"fairness":[150],"assessments":[152],"context":[155],"LLM":[157],"usage.":[158],"discussions":[160],"aim":[161],"to":[162,197],"inform":[163],"future":[164],"practices":[166],"policies.":[168],"To":[169],"support":[170],"further":[171],"research,":[172],"will":[175],"be":[176],"made":[177],"publicly":[178],"available":[179],"both":[181],"English":[182],"Romanian.":[184],"Additionally,":[185],"we":[186],"release":[187],"an":[188,203],"application":[190],"tailored":[191],"Romanian":[193],"students,":[194],"enabling":[195],"self-assess":[198],"interactive":[204],"practice-oriented":[206],"environment.":[207]},"counts_by_year":[],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-10-20T00:00:00"}
