{"id":"https://openalex.org/W7138935120","doi":"https://doi.org/10.1145/3788149.3788243","title":"Considerations on the Trustworthiness Evaluation of LLMs for Math Education: Reliability, Robustness, and Explainability","display_name":"Considerations on the Trustworthiness Evaluation of LLMs for Math Education: Reliability, Robustness, and Explainability","publication_year":2025,"publication_date":"2025-12-12","ids":{"openalex":"https://openalex.org/W7138935120","doi":"https://doi.org/10.1145/3788149.3788243"},"language":null,"primary_location":{"id":"doi:10.1145/3788149.3788243","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3788149.3788243","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 9th International Conference on Computer Science and Artificial Intelligence","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3788149.3788243","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5019073001","display_name":"Xuehuan Chen","orcid":null},"institutions":[{"id":"https://openalex.org/I4210114245","display_name":"Anhui Business College","ror":"https://ror.org/02d0cgn19","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210114245"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Xuehuan Chen","raw_affiliation_strings":["Concord College of Sino-Canada (CCSC), Hefei, Anhui, China"],"raw_orcid":"https://orcid.org/0009-0009-0402-0897","affiliations":[{"raw_affiliation_string":"Concord College of Sino-Canada (CCSC), Hefei, Anhui, China","institution_ids":["https://openalex.org/I4210114245"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103942071","display_name":"Yufei Mei","orcid":"https://orcid.org/0000-0003-3973-3132"},"institutions":[{"id":"https://openalex.org/I4210114245","display_name":"Anhui Business College","ror":"https://ror.org/02d0cgn19","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210114245"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yufei Mei","raw_affiliation_strings":["Concord College of Sino-Canada (CCSC), Hefei, Anhui, China"],"raw_orcid":"https://orcid.org/0009-0003-8593-3669","affiliations":[{"raw_affiliation_string":"Concord College of Sino-Canada (CCSC), Hefei, Anhui, China","institution_ids":["https://openalex.org/I4210114245"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129910704","display_name":"Yuhan Li","orcid":null},"institutions":[{"id":"https://openalex.org/I4210114245","display_name":"Anhui Business College","ror":"https://ror.org/02d0cgn19","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210114245"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuhan Li","raw_affiliation_strings":["Concord College of Sino-Canada (CCSC), Hefei, Anhui, China"],"raw_orcid":"https://orcid.org/0009-0000-5946-1475","affiliations":[{"raw_affiliation_string":"Concord College of Sino-Canada (CCSC), Hefei, Anhui, China","institution_ids":["https://openalex.org/I4210114245"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Yan Yang","orcid":"https://orcid.org/0009-0000-2305-0770"},"institutions":[{"id":"https://openalex.org/I4210114245","display_name":"Anhui Business College","ror":"https://ror.org/02d0cgn19","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210114245"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yan Yang","raw_affiliation_strings":["Concord College of Sino-Canada (CCSC), Hefei, Anhui, China"],"raw_orcid":"https://orcid.org/0009-0000-2305-0770","affiliations":[{"raw_affiliation_string":"Concord College of Sino-Canada (CCSC), Hefei, Anhui, China","institution_ids":["https://openalex.org/I4210114245"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129877732","display_name":"Qien Li","orcid":null},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qien Li","raw_affiliation_strings":["Peking University, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0009-7451-8973","affiliations":[{"raw_affiliation_string":"Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5050092469","display_name":"Rui Mei","orcid":"https://orcid.org/0000-0001-7480-2004"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Rui Mei","raw_affiliation_strings":["Peking University, Beijing, China and iFLYTEK Security Laboratory, Hefei, Anhui, China"],"raw_orcid":"https://orcid.org/0000-0001-7480-2004","affiliations":[{"raw_affiliation_string":"Peking University, Beijing, China and iFLYTEK Security Laboratory, Hefei, Anhui, China","institution_ids":["https://openalex.org/I20231570"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5019073001"],"corresponding_institution_ids":["https://openalex.org/I4210114245"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.91237917,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"561","last_page":"568"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12884","display_name":"Educational Assessment and Pedagogy","score":0.1168999969959259,"subfield":{"id":"https://openalex.org/subfields/3304","display_name":"Education"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T12884","display_name":"Educational Assessment and Pedagogy","score":0.1168999969959259,"subfield":{"id":"https://openalex.org/subfields/3304","display_name":"Education"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11902","display_name":"Intelligent Tutoring Systems and Adaptive Learning","score":0.11169999837875366,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.04910000041127205,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/trustworthiness","display_name":"Trustworthiness","score":0.590399980545044},{"id":"https://openalex.org/keywords/athletes","display_name":"Athletes","score":0.27070000767707825},{"id":"https://openalex.org/keywords/perception","display_name":"Perception","score":0.26669999957084656},{"id":"https://openalex.org/keywords/affect","display_name":"Affect (linguistics)","score":0.2535000145435333}],"concepts":[{"id":"https://openalex.org/C153701036","wikidata":"https://www.wikidata.org/wiki/Q659974","display_name":"Trustworthiness","level":2,"score":0.590399980545044},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.42179998755455017},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.3707999885082245},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.3506999909877777},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.2858999967575073},{"id":"https://openalex.org/C2781054738","wikidata":"https://www.wikidata.org/wiki/Q4813730","display_name":"Athletes","level":2,"score":0.27070000767707825},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.26669999957084656},{"id":"https://openalex.org/C2776035688","wikidata":"https://www.wikidata.org/wiki/Q1606558","display_name":"Affect (linguistics)","level":2,"score":0.2535000145435333},{"id":"https://openalex.org/C75630572","wikidata":"https://www.wikidata.org/wiki/Q538904","display_name":"Applied psychology","level":1,"score":0.250900000333786},{"id":"https://openalex.org/C112930515","wikidata":"https://www.wikidata.org/wiki/Q4389547","display_name":"Risk analysis (engineering)","level":1,"score":0.2460000067949295}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3788149.3788243","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3788149.3788243","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 9th International Conference on Computer Science and Artificial Intelligence","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3788149.3788243","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3788149.3788243","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 9th International Conference on Computer Science and Artificial Intelligence","raw_type":"proceedings-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.6417401432991028,"display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":6,"referenced_works":["https://openalex.org/W4388704962","https://openalex.org/W4407601669","https://openalex.org/W4409362112","https://openalex.org/W4412886755","https://openalex.org/W4413070980","https://openalex.org/W7127088337"],"related_works":[],"abstract_inverted_index":{"Large":[0],"language":[1],"models":[2],"(LLMs)":[3],"are":[4,96],"increasingly":[5],"applied":[6],"to":[7,19,62,127],"math":[8,46],"education,":[9],"yet":[10],"their":[11],"opaque":[12],"architectures,":[13],"unstable":[14],"reasoning":[15,54,90,106,125],"behaviors,":[16],"and":[17,37,52,65,74,87,108,116],"sensitivity":[18],"linguistic":[20],"variations":[21],"raise":[22],"critical":[23],"trustworthiness":[24],"concerns.":[25],"This":[26],"paper":[27],"presents":[28],"a":[29,44],"unified":[30],"evaluation":[31],"framework":[32,57],"that":[33],"operationalizes":[34],"reliability,":[35],"robustness,":[36],"explainability":[38],"into":[39],"automated":[40],"assessment":[41],"tasks.":[42],"Using":[43],"curated":[45],"problem":[47],"set":[48],"with":[49,120],"perturbation":[50],"variants":[51],"step-level":[53],"annotations,":[55],"the":[56,111],"measures":[58],"factual":[59],"correctness,":[60],"resistance":[61],"surface-level":[63],"changes,":[64],"consistency":[66],"of":[67],"intermediate":[68],"reasoning.":[69],"Experiments":[70],"across":[71],"both":[72],"commercial":[73],"open-source":[75],"LLMs":[76],"reveal":[77],"substantial":[78],"reliability":[79],"gaps,":[80],"notable":[81],"accuracy":[82],"degradation":[83],"under":[84],"minor":[85],"rephrasings,":[86],"pervasive":[88],"early-stage":[89],"inconsistencies":[91],"even":[92],"when":[93],"final":[94],"answers":[95],"correct.":[97],"These":[98],"findings":[99],"highlight":[100],"fundamental":[101],"limitations":[102],"in":[103],"current":[104],"models\u2019":[105],"faithfulness":[107],"generalization,":[109],"underscoring":[110],"need":[112],"for":[113],"improved":[114],"alignment":[115],"robustness":[117],"strategies":[118],"together":[119],"more":[121],"faithful,":[122],"pedagogically":[123],"sound":[124],"processes":[126],"support":[128],"trustworthy":[129],"educational":[130],"deployment.":[131]},"counts_by_year":[],"updated_date":"2026-05-05T08:41:31.759640","created_date":"2026-03-20T00:00:00"}
