{"id":"https://openalex.org/W4412888795","doi":"https://doi.org/10.18653/v1/2025.findings-acl.209","title":"MALAMUTE: A Multilingual, Highly-granular, Template-free, Education-based Probing Dataset","display_name":"MALAMUTE: A Multilingual, Highly-granular, Template-free, Education-based Probing Dataset","publication_year":2025,"publication_date":"2025-01-01","ids":{"openalex":"https://openalex.org/W4412888795","doi":"https://doi.org/10.18653/v1/2025.findings-acl.209"},"language":"en","primary_location":{"id":"doi:10.18653/v1/2025.findings-acl.209","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.findings-acl.209","pdf_url":"https://aclanthology.org/2025.findings-acl.209.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Findings of the Association for Computational Linguistics: ACL 2025","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://aclanthology.org/2025.findings-acl.209.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5060639758","display_name":"Sagi Shaier","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sagi Shaier","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113179889","display_name":"George Arthur Baker","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"George Arthur Baker","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5050945965","display_name":"Chiranthan Sridhar","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chiranthan Sridhar","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5041860080","display_name":"Lawrence Hunter","orcid":"https://orcid.org/0000-0003-1455-3370"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lawrence Hunter","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5093081501","display_name":"Katharina von der Wense","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Katharina Von Der Wense","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.14775543,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"4051","last_page":"4069"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12357","display_name":"Digital Media Forensic Detection","score":0.8587999939918518,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12357","display_name":"Digital Media Forensic Detection","score":0.8587999939918518,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.8037999868392944,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.7833999991416931,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6357853412628174},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3804776966571808},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3200720250606537}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6357853412628174},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3804776966571808},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3200720250606537}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.18653/v1/2025.findings-acl.209","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.findings-acl.209","pdf_url":"https://aclanthology.org/2025.findings-acl.209.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Findings of the Association for Computational Linguistics: ACL 2025","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.18653/v1/2025.findings-acl.209","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.findings-acl.209","pdf_url":"https://aclanthology.org/2025.findings-acl.209.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Findings of the Association for Computational Linguistics: ACL 2025","raw_type":"proceedings-article"},"sustainable_development_goals":[{"display_name":"Quality Education","score":0.8399999737739563,"id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4412888795.pdf","grobid_xml":"https://content.openalex.org/works/W4412888795.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W3204019825"],"abstract_inverted_index":{"Language":[0],"models":[1],"(LMs)":[2],"have":[3,38,174],"excelled":[4],"in":[5,24,67,177,189],"various":[6],"broad":[7,57],"domains.However,":[8],"to":[9,34,119],"ensure":[10],"their":[11,186],"safe":[12,187],"and":[13,70,87,105,127,135,142,147,163,191,198],"effective":[14],"integration":[15],"into":[16,125],"real-world":[17],"educational":[18,47,140],"settings,":[19],"they":[20,173],"must":[21],"demonstrate":[22],"proficiency":[23],"specific,":[25],"granular":[26,89],"areas":[27],"of":[28,144,161],"knowledge.Existing":[29],"cloze-style":[30,111],"benchmarks,":[31],"commonly":[32],"used":[33],"evaluate":[35],"LMs'":[36,157],"knowledge,":[37],"three":[39,101],"major":[40],"limitations.They:":[41],"1)":[42],"do":[43,60],"not":[44,61],"cover":[45],"the":[46,64,108,193],"domain;":[48],"2)":[49],"typically":[50],"focus":[51],"on":[52,74,166,182],"low-complexity,":[53],"generic":[54],"knowledge":[55,66,178],"or":[56],"domains,":[58,115],"which":[59],"adequately":[62],"assess":[63],"models'":[65],"specific":[68,183],"subjects;":[69],"3)":[71],"often":[72],"rely":[73],"templates":[75],"that":[76,169],"can":[77,200],"bias":[78],"model":[79],"predictions.Here,":[80],"we":[81],"introduce":[82],"MALAMUTE,":[83],"a":[84],"multilingual,":[85],"template-free,":[86],"highly":[88],"probing":[90],"dataset":[91],"comprising":[92],"expert-written,":[93],"peer-reviewed":[94],"probes":[95],"from":[96],"71":[97],"university-level":[98],"textbooks":[99],"across":[100],"languages":[102],"(English,":[103],"Spanish,":[104],"Polish).MALAMUTE":[106],"is":[107],"first":[109],"education-based":[110],"dataset.It":[112],"covers":[113],"eight":[114],"each":[116],"with":[117],"up":[118],"14":[120],"subdomains,":[121],"further":[122,196],"broken":[123],"down":[124],"concepts":[126,134],"concept-based":[128],"prompts,":[129],"totaling":[130],"33,361":[131],"university":[132],"curriculum":[133],"116,887":[136],"prompts.MALAMUTE's":[137],"fine":[138],"granularity,":[139],"focus,":[141],"inclusion":[143],"both":[145],"sentencelevel":[146],"paragraph-level":[148],"prompts":[149],"make":[150],"it":[151],"an":[152],"ideal":[153],"tool":[154],"for":[155,195],"evaluating":[156],"course-related":[158],"knowledge.Our":[159],"evaluation":[160],"masked":[162],"causal":[164],"LMs":[165],"MALAMUTE":[167],"shows":[168],"despite":[170],"overall":[171],"proficiency,":[172],"significant":[175],"gaps":[176],"when":[179],"examined":[180],"closely":[181],"subjects,":[184],"hindering":[185],"use":[188],"classrooms":[190],"underscoring":[192],"need":[194],"development.Code":[197],"data":[199],"be":[201],"found":[202],"at":[203],"https://github.com/Shaier/MALAMUTE.":[204]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
