{"id":"https://openalex.org/W7083290199","doi":"https://doi.org/10.18420/abp2025_10","title":"Small but Competitive \u2013 Evaluating DeepSeek-R1 Among Diverse Open LLMs for Formative Programming Feedback","display_name":"Small but Competitive \u2013 Evaluating DeepSeek-R1 Among Diverse Open LLMs for Formative Programming Feedback","publication_year":2025,"publication_date":"2025-01-01","ids":{"openalex":"https://openalex.org/W7083290199","doi":"https://doi.org/10.18420/abp2025_10"},"language":"en","primary_location":{"id":"doi:10.18420/abp2025_10","is_oa":true,"landing_page_url":"https://doi.org/10.18420/abp2025_10","pdf_url":null,"source":{"id":"https://openalex.org/S7407052918","display_name":"Gesellschaft f\u00fcr Informatik (GI)","issn_l":null,"issn":[],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article-journal"},"type":"article","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.18420/abp2025_10","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Azaiz, Imen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Azaiz, Imen","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Konrad, Felippo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Konrad, Felippo","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":null,"display_name":"Strickroth, Sven","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Strickroth, Sven","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.65684252,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":true,"primary_topic":{"id":"https://openalex.org/T10978","display_name":"Prenatal Screening and Diagnostics","score":0.1914999932050705,"subfield":{"id":"https://openalex.org/subfields/2735","display_name":"Pediatrics, Perinatology and Child Health"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},"topics":[{"id":"https://openalex.org/T10978","display_name":"Prenatal Screening and Diagnostics","score":0.1914999932050705,"subfield":{"id":"https://openalex.org/subfields/2735","display_name":"Pediatrics, Perinatology and Child Health"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T12552","display_name":"Fetal and Pediatric Neurological Disorders","score":0.09830000251531601,"subfield":{"id":"https://openalex.org/subfields/2735","display_name":"Pediatrics, Perinatology and Child Health"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T12587","display_name":"Gestational Trophoblastic Disease Studies","score":0.09480000287294388,"subfield":{"id":"https://openalex.org/subfields/2739","display_name":"Public Health, Environmental and Occupational Health"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/formative-assessment","display_name":"Formative assessment","score":0.8345999717712402},{"id":"https://openalex.org/keywords/correctness","display_name":"Correctness","score":0.7229999899864197},{"id":"https://openalex.org/keywords/java","display_name":"Java","score":0.5831000208854675},{"id":"https://openalex.org/keywords/personalization","display_name":"Personalization","score":0.5663999915122986},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.5109000205993652},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.4081000089645386},{"id":"https://openalex.org/keywords/summative-assessment","display_name":"Summative assessment","score":0.39590001106262207},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.3531000018119812}],"concepts":[{"id":"https://openalex.org/C42525527","wikidata":"https://www.wikidata.org/wiki/Q1209955","display_name":"Formative assessment","level":2,"score":0.8345999717712402},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7773000001907349},{"id":"https://openalex.org/C55439883","wikidata":"https://www.wikidata.org/wiki/Q360812","display_name":"Correctness","level":2,"score":0.7229999899864197},{"id":"https://openalex.org/C548217200","wikidata":"https://www.wikidata.org/wiki/Q251","display_name":"Java","level":2,"score":0.5831000208854675},{"id":"https://openalex.org/C183003079","wikidata":"https://www.wikidata.org/wiki/Q1000371","display_name":"Personalization","level":2,"score":0.5663999915122986},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.5109000205993652},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.5052000284194946},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.4562000036239624},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4424999952316284},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.4081000089645386},{"id":"https://openalex.org/C152747807","wikidata":"https://www.wikidata.org/wiki/Q1854913","display_name":"Summative assessment","level":3,"score":0.39590001106262207},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.3531000018119812},{"id":"https://openalex.org/C184356942","wikidata":"https://www.wikidata.org/wiki/Q830382","display_name":"Best practice","level":2,"score":0.3402999937534332},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.3366999924182892},{"id":"https://openalex.org/C170130773","wikidata":"https://www.wikidata.org/wiki/Q216378","display_name":"Usability","level":2,"score":0.3325999975204468},{"id":"https://openalex.org/C18762648","wikidata":"https://www.wikidata.org/wiki/Q42213","display_name":"Work (physics)","level":2,"score":0.32510000467300415},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.32429999113082886},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.3100999891757965},{"id":"https://openalex.org/C56739046","wikidata":"https://www.wikidata.org/wiki/Q192060","display_name":"Knowledge management","level":1,"score":0.30160000920295715},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.30140000581741333},{"id":"https://openalex.org/C45235069","wikidata":"https://www.wikidata.org/wiki/Q278425","display_name":"Table (database)","level":2,"score":0.2953999936580658},{"id":"https://openalex.org/C2986567400","wikidata":"https://www.wikidata.org/wiki/Q15777","display_name":"C programming language","level":3,"score":0.28130000829696655},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.28130000829696655},{"id":"https://openalex.org/C2908586218","wikidata":"https://www.wikidata.org/wiki/Q251","display_name":"Java Programming Language","level":3,"score":0.27730000019073486},{"id":"https://openalex.org/C539667460","wikidata":"https://www.wikidata.org/wiki/Q2414942","display_name":"Management science","level":1,"score":0.2637999951839447},{"id":"https://openalex.org/C3018397939","wikidata":"https://www.wikidata.org/wiki/Q3644502","display_name":"Open source","level":3,"score":0.2556000053882599},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.2549999952316284},{"id":"https://openalex.org/C3018587665","wikidata":"https://www.wikidata.org/wiki/Q7268696","display_name":"Qualitative analysis","level":3,"score":0.25459998846054077},{"id":"https://openalex.org/C33857546","wikidata":"https://www.wikidata.org/wiki/Q80006","display_name":"Computer programming","level":2,"score":0.2533999979496002}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.18420/abp2025_10","is_oa":true,"landing_page_url":"https://doi.org/10.18420/abp2025_10","pdf_url":null,"source":{"id":"https://openalex.org/S7407052918","display_name":"Gesellschaft f\u00fcr Informatik (GI)","issn_l":null,"issn":[],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article-journal"}],"best_oa_location":{"id":"doi:10.18420/abp2025_10","is_oa":true,"landing_page_url":"https://doi.org/10.18420/abp2025_10","pdf_url":null,"source":{"id":"https://openalex.org/S7407052918","display_name":"Gesellschaft f\u00fcr Informatik (GI)","issn_l":null,"issn":[],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article-journal"},"sustainable_development_goals":[{"score":0.7379304766654968,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Automated":[0],"formative":[1],"feedback":[2,27,134],"powered":[3],"by":[4],"large":[5,49],"language":[6,50,78],"models":[7,38,51,60,118],"(LLMs)":[8],"holds":[9],"promise":[10],"for":[11,35,141,153],"supporting":[12],"novice":[13],"programming":[14,69],"learners.":[15],"However,":[16],"little":[17],"is":[18],"known":[19],"about":[20],"the":[21,93,112,133,150],"performance":[22,95],"trade-offs":[23],"between":[24],"model":[25],"size,":[26],"quality":[28],"and":[29,48,57,76,88,101,125,147],"correctness":[30],"in":[31,85,107,122],"automated":[32],"scenarios":[33],"especially":[34],"recent":[36],"reasoning":[37,114],"such":[39],"as":[40],"DeepSeek-R1.":[41],"This":[42],"study":[43,109],"evaluates":[44],"twelve":[45],"open":[46],"small":[47],"\u2013":[52,61],"including":[53],"reasoning,":[54],"code":[55],"specialized":[56],"general":[58],"purpose":[59],"on":[62],"two":[63,155],"Java":[64],"assignments":[65],"from":[66],"an":[67,97,102],"introductory":[68],"course.":[70],"Using":[71],"prompt":[72],"engineering":[73],"with":[74,96,149],"structured":[75],"natural":[77],"outputs,":[79],"this":[80],"work":[81],"assesses":[82],"their":[83],"accuracy":[84,98,124],"classifying":[86],"syntactic":[87],"functional":[89],"correctness.":[90],"DeepSeek-R1:14B":[91],"achieved":[92],"best":[94],"of":[99,105,144],".81":[100],"F1":[103],"score":[104],".68":[106],"our":[108],"despite":[110],"being":[111],"smallest":[113],"model,":[115],"outperforming":[116],"larger":[117],"(e.":[119],"g.,":[120],"CodeLama:70B)":[121],"both":[123],"stability.":[126],"A":[127],"qualitative":[128],"analysis":[129],"shows":[130],"that,":[131],"while":[132],"contains":[135],"informative":[136],"corrections,":[137],"it":[138],"lacks":[139],"personalization":[140],"a":[142],"quarter":[143],"all":[145],"submissions":[146],"complies":[148],"assignment":[151],"specification":[152],"only":[154],"thirds":[156],"overall.":[157]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
