{"id":"https://openalex.org/W7154403987","doi":"https://doi.org/10.48550/arxiv.2604.10493","title":"SWE-Shepherd: Advancing PRMs for Reinforcing Code Agents","display_name":"SWE-Shepherd: Advancing PRMs for Reinforcing Code Agents","publication_year":2026,"publication_date":"2026-04-12","ids":{"openalex":"https://openalex.org/W7154403987","doi":"https://doi.org/10.48550/arxiv.2604.10493"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.10493","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.10493","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.10493","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5115729339","display_name":"Mahir Labib Dihan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dihan, Mahir Labib","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5133621499","display_name":"Md Ashrafur Rahman Khan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Khan, Md Ashrafur Rahman","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.25450000166893005,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.25450000166893005,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10430","display_name":"Software Engineering Techniques and Practices","score":0.0957999974489212,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.06589999794960022,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/heuristics","display_name":"Heuristics","score":0.7382000088691711},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.6202999949455261},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.578000009059906},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.5564000010490417},{"id":"https://openalex.org/keywords/construct","display_name":"Construct (python library)","score":0.4975999891757965},{"id":"https://openalex.org/keywords/interdependence","display_name":"Interdependence","score":0.4726000130176544},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.45570001006126404}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8567000031471252},{"id":"https://openalex.org/C127705205","wikidata":"https://www.wikidata.org/wiki/Q5748245","display_name":"Heuristics","level":2,"score":0.7382000088691711},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.6202999949455261},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.578000009059906},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.5564000010490417},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.4975999891757965},{"id":"https://openalex.org/C185874996","wikidata":"https://www.wikidata.org/wiki/Q269699","display_name":"Interdependence","level":2,"score":0.4726000130176544},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.45570001006126404},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.4278999865055084},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.42489999532699585},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.4122999906539917},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.4099999964237213},{"id":"https://openalex.org/C4554734","wikidata":"https://www.wikidata.org/wiki/Q593744","display_name":"Knowledge base","level":2,"score":0.3919000029563904},{"id":"https://openalex.org/C42058472","wikidata":"https://www.wikidata.org/wiki/Q810214","display_name":"Base (topology)","level":2,"score":0.3758000135421753},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3749000132083893},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.36730000376701355},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.335099995136261},{"id":"https://openalex.org/C51929080","wikidata":"https://www.wikidata.org/wiki/Q2425187","display_name":"Codebase","level":3,"score":0.30239999294281006},{"id":"https://openalex.org/C153180980","wikidata":"https://www.wikidata.org/wiki/Q19776675","display_name":"Commit","level":2,"score":0.2791000008583069},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.2615000009536743}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.10493","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.10493","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.10493","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.10493","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Automating":[0],"real-world":[1],"software":[2],"engineering":[3],"tasks":[4],"remains":[5],"challenging":[6],"for":[7,17,93],"large":[8],"language":[9],"model":[10,112],"(LLM)-based":[11],"agents":[12],"due":[13],"to":[14,42,64,88,117],"the":[15,119,126,133],"need":[16],"long-horizon":[18],"reasoning":[19],"over":[20],"large,":[21],"evolving":[22],"codebases":[23],"and":[24,51,69,107,131,151],"making":[25],"consistent":[26],"decisions":[27,137],"across":[28],"interdependent":[29],"actions.":[30,123],"Existing":[31],"approaches":[32],"typically":[33],"rely":[34],"on":[35,59,113,144],"static":[36],"prompting":[37],"strategies":[38],"or":[39],"handcrafted":[40],"heuristics":[41],"select":[43],"actions":[44,130],"such":[45],"as":[46],"code":[47,95],"editing,":[48],"file":[49],"navigation,":[50],"test":[52],"execution,":[53],"but":[54],"they":[55],"lack":[56],"fine-grained":[57],"feedback":[58],"intermediate":[60,122,160],"decisions.":[61],"This":[62],"leads":[63],"inefficient":[65],"exploration,":[66],"error":[67],"propagation,":[68],"brittle":[70],"solution":[71],"trajectories.":[72],"To":[73],"address":[74],"this":[75],"limitation,":[76],"we":[77,101],"propose":[78],"SWE-Shepherd,":[79],"a":[80,109,114],"framework":[81],"that":[82],"introduces":[83],"Process":[84],"Reward":[85],"Models":[86],"(PRMs)":[87],"provide":[89],"dense,":[90],"step-level":[91],"supervision":[92],"repository-level":[94],"agents.":[96],"Using":[97],"trajectories":[98],"from":[99],"SWE-Bench,":[100],"construct":[102],"an":[103],"action-level":[104],"reward":[105,111],"dataset":[106],"train":[108],"lightweight":[110],"base":[115],"LLM":[116],"estimate":[118],"usefulness":[120],"of":[121],"During":[124],"inference,":[125],"PRM":[127],"evaluates":[128],"candidate":[129],"guides":[132],"agent":[134],"toward":[135],"higher-reward":[136],"without":[138],"requiring":[139],"full":[140],"reinforcement":[141],"learning.":[142],"Experiments":[143],"SWE-Bench":[145],"Verified":[146],"demonstrate":[147],"improved":[148],"interaction":[149],"efficiency":[150],"action":[152],"quality,":[153],"while":[154],"also":[155],"highlighting":[156],"challenges":[157],"in":[158],"aligning":[159],"rewards":[161],"with":[162],"final":[163],"task":[164],"success.":[165]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-04-15T00:00:00"}
