{"id":"https://openalex.org/W7134819710","doi":"https://doi.org/10.48550/arxiv.2603.07779","title":"Scaling Data Difficulty: Improving Coding Models via Reinforcement Learning on Fresh and Challenging Problems","display_name":"Scaling Data Difficulty: Improving Coding Models via Reinforcement Learning on Fresh and Challenging Problems","publication_year":2026,"publication_date":"2026-03-08","ids":{"openalex":"https://openalex.org/W7134819710","doi":"https://doi.org/10.48550/arxiv.2603.07779"},"language":null,"primary_location":{"id":"pmh:doi:10.48550/arxiv.2603.07779","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5022488585","display_name":"Zongqian Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Li, Zongqian","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5031963349","display_name":"Tengchao Lv","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lv, Tengchao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128682638","display_name":"Shaohan Huang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Huang, Shaohan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5080253330","display_name":"Yixuan Su","orcid":"https://orcid.org/0000-0002-1472-7791"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Su, Yixuan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5059272550","display_name":"Qi Sun","orcid":"https://orcid.org/0009-0005-0146-0724"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sun, Qinzheng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5031355252","display_name":"Qiufeng Yin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yin, Qiufeng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128676351","display_name":"Ying Xin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xin, Ying","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5016920988","display_name":"Shan Li","orcid":"https://orcid.org/0000-0001-6825-0346"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Scarlett","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128659182","display_name":"Lei Cui","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cui, Lei","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128639174","display_name":"Nigel Collier","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Collier, Nigel","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5128667845","display_name":"Furu Wei","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wei, Furu","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":11,"corresponding_author_ids":["https://openalex.org/A5022488585"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.6563000082969666,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.6563000082969666,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.1168999969959259,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.03449999913573265,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/coding","display_name":"Coding (social sciences)","score":0.501800000667572},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.46950000524520874},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.4341999888420105},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.428600013256073},{"id":"https://openalex.org/keywords/baseline","display_name":"Baseline (sea)","score":0.40389999747276306},{"id":"https://openalex.org/keywords/face","display_name":"Face (sociological concept)","score":0.4020000100135803},{"id":"https://openalex.org/keywords/source-code","display_name":"Source code","score":0.375},{"id":"https://openalex.org/keywords/data-modeling","display_name":"Data modeling","score":0.36880001425743103},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.319599986076355}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.838100016117096},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5902000069618225},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5491999983787537},{"id":"https://openalex.org/C179518139","wikidata":"https://www.wikidata.org/wiki/Q5140297","display_name":"Coding (social sciences)","level":2,"score":0.501800000667572},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.46950000524520874},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.4341999888420105},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.428600013256073},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.41659998893737793},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.40389999747276306},{"id":"https://openalex.org/C2779304628","wikidata":"https://www.wikidata.org/wiki/Q3503480","display_name":"Face (sociological concept)","level":2,"score":0.4020000100135803},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.375},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.36880001425743103},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.319599986076355},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.3163999915122986},{"id":"https://openalex.org/C24756922","wikidata":"https://www.wikidata.org/wiki/Q1757694","display_name":"Data quality","level":3,"score":0.31529998779296875},{"id":"https://openalex.org/C133462117","wikidata":"https://www.wikidata.org/wiki/Q4929239","display_name":"Data collection","level":2,"score":0.3138999938964844},{"id":"https://openalex.org/C199519371","wikidata":"https://www.wikidata.org/wiki/Q942695","display_name":"Source lines of code","level":3,"score":0.2964000105857849},{"id":"https://openalex.org/C160920958","wikidata":"https://www.wikidata.org/wiki/Q7662746","display_name":"Synthetic data","level":2,"score":0.28790000081062317},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.287200003862381},{"id":"https://openalex.org/C31510193","wikidata":"https://www.wikidata.org/wiki/Q1192553","display_name":"Facial recognition system","level":3,"score":0.2858999967575073},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.2761000096797943},{"id":"https://openalex.org/C45804977","wikidata":"https://www.wikidata.org/wiki/Q7239673","display_name":"Predictive modelling","level":2,"score":0.27480000257492065},{"id":"https://openalex.org/C2778915421","wikidata":"https://www.wikidata.org/wiki/Q3643177","display_name":"Performance improvement","level":2,"score":0.2671999931335449},{"id":"https://openalex.org/C110332635","wikidata":"https://www.wikidata.org/wiki/Q629498","display_name":"Genetic programming","level":2,"score":0.2653999924659729},{"id":"https://openalex.org/C138827492","wikidata":"https://www.wikidata.org/wiki/Q6661985","display_name":"Data processing","level":2,"score":0.2565000057220459},{"id":"https://openalex.org/C99844830","wikidata":"https://www.wikidata.org/wiki/Q102441924","display_name":"Scaling","level":2,"score":0.2549999952316284}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:doi:10.48550/arxiv.2603.07779","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2603.07779","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.07779","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2603.07779","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Training":[0],"next-generation":[1],"code":[2,177],"generation":[3],"models":[4],"requires":[5],"high-quality":[6],"datasets,":[7],"yet":[8],"existing":[9],"datasets":[10,112],"face":[11],"difficulty":[12,29,56],"imbalance,":[13],"format":[14],"inconsistency,":[15],"and":[16,28,42,89,122,135],"data":[17,26,162],"quality":[18],"problems.":[19],"We":[20,31],"address":[21],"these":[22],"challenges":[23],"through":[24],"systematic":[25],"processing":[27],"scaling.":[30],"introduce":[32],"a":[33],"four-stage":[34],"Data":[35],"Processing":[36],"Framework":[37],"encompassing":[38],"collection,":[39],"processing,":[40],"filtering,":[41],"verification,":[43],"incorporating":[44],"Automatic":[45],"Difficulty":[46],"Filtering":[47],"via":[48],"an":[49],"LLM-based":[50],"predict-calibrate-select":[51],"framework":[52],"that":[53,97,160],"leverages":[54],"multi-dimensional":[55],"metrics":[57],"across":[58,138],"five":[59],"weighted":[60],"dimensions":[61],"to":[62,109,144],"retain":[63],"challenging":[64,168],"problems":[65,83,137],"while":[66],"removing":[67],"simplistic":[68],"ones.":[69],"The":[70,127],"resulting":[71],"MicroCoder":[72,98,128],"dataset":[73,129,174],"comprises":[74],"tens":[75],"of":[76,78,113],"thousands":[77],"curated":[79],"real":[80],"competitive":[81],"programming":[82],"from":[84],"diverse":[85],"platforms,":[86],"emphasizing":[87],"recency":[88],"difficulty.":[90],"Evaluations":[91],"on":[92,133,167],"strictly":[93],"unseen":[94],"LiveCodeBench":[95],"demonstrate":[96],"achieves":[99],"3x":[100],"larger":[101],"performance":[102,150,166],"gains":[103,147],"within":[104],"300":[105],"training":[106,125],"steps":[107],"compared":[108],"widely-used":[110],"baseline":[111],"comparable":[114],"size,":[115],"with":[116],"consistent":[117],"advantages":[118],"under":[119],"both":[120],"GRPO":[121],"its":[123],"variant":[124],"algorithms.":[126],"delivers":[130],"obvious":[131],"improvements":[132],"medium":[134],"hard":[136],"different":[139],"model":[140,152,165],"sizes,":[141],"achieving":[142],"up":[143],"17.2%":[145],"relative":[146],"in":[148,176],"overall":[149],"where":[151],"capabilities":[153],"are":[154],"most":[155],"stretched.":[156],"These":[157],"results":[158],"validate":[159],"difficulty-aware":[161],"curation":[163],"improves":[164],"tasks,":[169],"providing":[170],"multiple":[171],"insights":[172],"for":[173],"creation":[175],"generation.":[178]},"counts_by_year":[],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2026-03-11T00:00:00"}
