{"id":"https://openalex.org/W7126273631","doi":"https://doi.org/10.48550/arxiv.2601.21649","title":"SWE-Spot: Building Small Repo-Experts with Repository-Centric Learning","display_name":"SWE-Spot: Building Small Repo-Experts with Repository-Centric Learning","publication_year":2026,"publication_date":"2026-01-29","ids":{"openalex":"https://openalex.org/W7126273631","doi":"https://doi.org/10.48550/arxiv.2601.21649"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2601.21649","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.21649","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2601.21649","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5124360699","display_name":"Jinjun Peng","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Peng, Jinjun","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5115997593","display_name":"Magnus Saebo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Saebo, Magnus","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124363215","display_name":"Tianjun Zhong","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhong, Tianjun","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124363777","display_name":"Yi-Jie Cheng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cheng, Yi-Jie","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124417534","display_name":"Junfeng Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Junfeng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124338081","display_name":"Baishakhi Ray","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ray, Baishakhi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108149744","display_name":"Simin Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Simin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5124421158","display_name":"Yangruibo Ding","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ding, Yangruibo","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5124360699"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11636","display_name":"Artificial Intelligence in Healthcare and Education","score":0.18050000071525574,"subfield":{"id":"https://openalex.org/subfields/2718","display_name":"Health Informatics"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},"topics":[{"id":"https://openalex.org/T11636","display_name":"Artificial Intelligence in Healthcare and Education","score":0.18050000071525574,"subfield":{"id":"https://openalex.org/subfields/2718","display_name":"Health Informatics"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T10764","display_name":"Privacy-Preserving Technologies in Data","score":0.10869999974966049,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.08969999849796295,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.6219000220298767},{"id":"https://openalex.org/keywords/coding","display_name":"Coding (social sciences)","score":0.6100999712944031},{"id":"https://openalex.org/keywords/software-deployment","display_name":"Software deployment","score":0.5644999742507935},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.45660001039505005},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.45179998874664307},{"id":"https://openalex.org/keywords/parametric-statistics","display_name":"Parametric statistics","score":0.44749999046325684},{"id":"https://openalex.org/keywords/dimension","display_name":"Dimension (graph theory)","score":0.439300000667572},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.43230000138282776},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.414000004529953}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7447999715805054},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.6219000220298767},{"id":"https://openalex.org/C179518139","wikidata":"https://www.wikidata.org/wiki/Q5140297","display_name":"Coding (social sciences)","level":2,"score":0.6100999712944031},{"id":"https://openalex.org/C105339364","wikidata":"https://www.wikidata.org/wiki/Q2297740","display_name":"Software deployment","level":2,"score":0.5644999742507935},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.45660001039505005},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.45179998874664307},{"id":"https://openalex.org/C117251300","wikidata":"https://www.wikidata.org/wiki/Q1849855","display_name":"Parametric statistics","level":2,"score":0.44749999046325684},{"id":"https://openalex.org/C33676613","wikidata":"https://www.wikidata.org/wiki/Q13415176","display_name":"Dimension (graph theory)","level":2,"score":0.439300000667572},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.43230000138282776},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4230000078678131},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.414000004529953},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.39969998598098755},{"id":"https://openalex.org/C24574437","wikidata":"https://www.wikidata.org/wiki/Q7135228","display_name":"Parametric model","level":3,"score":0.3682999908924103},{"id":"https://openalex.org/C189474733","wikidata":"https://www.wikidata.org/wiki/Q917912","display_name":"Model building","level":2,"score":0.34459999203681946},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.33149999380111694},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.32109999656677246},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.32100000977516174},{"id":"https://openalex.org/C199519371","wikidata":"https://www.wikidata.org/wiki/Q942695","display_name":"Source lines of code","level":3,"score":0.30570000410079956},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.28459998965263367},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.2833999991416931},{"id":"https://openalex.org/C2778571376","wikidata":"https://www.wikidata.org/wiki/Q1355821","display_name":"Frontier","level":2,"score":0.27559998631477356},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.27549999952316284},{"id":"https://openalex.org/C99844830","wikidata":"https://www.wikidata.org/wiki/Q102441924","display_name":"Scaling","level":2,"score":0.2727000117301941},{"id":"https://openalex.org/C18762648","wikidata":"https://www.wikidata.org/wiki/Q42213","display_name":"Work (physics)","level":2,"score":0.27219998836517334},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.2721000015735626},{"id":"https://openalex.org/C12590798","wikidata":"https://www.wikidata.org/wiki/Q3933199","display_name":"Replication (statistics)","level":2,"score":0.25949999690055847},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.2587999999523163},{"id":"https://openalex.org/C2984074130","wikidata":"https://www.wikidata.org/wiki/Q73539779","display_name":"R package","level":2,"score":0.2524000108242035}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2601.21649","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.21649","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2601.21649","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.21649","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"The":[0],"deployment":[1],"of":[2,89,130],"coding":[3,197],"agents":[4],"in":[5],"privacy-sensitive":[6],"and":[7,154,176,191],"resource-constrained":[8],"environments":[9],"drives":[10],"the":[11,34,47,87],"demand":[12],"for":[13,182],"capable":[14],"open-weight":[15,144],"Small":[16],"Language":[17],"Models":[18],"(SLMs).":[19],"However,":[20],"they":[21,32],"suffer":[22],"from":[23],"a":[24,71,90,114,128,189],"fundamental":[25],"capability":[26],"gap:":[27],"unlike":[28],"frontier":[29],"large":[30],"models,":[31],"lack":[33],"inference-time":[35,106],"strong":[36],"generalization":[37],"to":[38,60,101,125,147],"work":[39],"with":[40],"complicated,":[41],"unfamiliar":[42],"codebases.":[43],"We":[44],"identify":[45],"that":[46,74,138,169,181,194],"prevailing":[48],"Task-Centric":[49],"Learning":[50,69],"(TCL)":[51],"paradigm,":[52,111],"which":[53],"scales":[54],"exposure":[55],"across":[56,162],"disparate":[57],"repositories,":[58],"fails":[59],"address":[61],"this":[62,109],"limitation.":[63],"In":[64],"response,":[65],"we":[66,112],"propose":[67],"Repository-Centric":[68,116],"(RCL),":[70],"paradigm":[72],"shift":[73],"prioritizes":[75],"vertical":[76],"repository":[77,186],"depth":[78],"over":[79],"horizontal":[80],"task":[81],"breadth,":[82],"suggesting":[83],"SLMs":[84],"must":[85],"internalize":[86],"\"physics\"":[88],"target":[91],"software":[92],"environment":[93],"through":[94],"parametric":[95],"knowledge":[96],"acquisition,":[97],"rather":[98],"than":[99],"attempting":[100],"recover":[102],"it":[103],"via":[104],"costly":[105],"search.":[107],"Following":[108],"new":[110],"design":[113],"four-unit":[115],"Experience,":[117],"transforming":[118],"static":[119],"codebases":[120],"into":[121],"interactive":[122],"learning":[123],"signals,":[124],"train":[126],"SWE-Spot-4B,":[127],"family":[129],"highly":[131],"compact":[132],"models":[133,145,158],"built":[134],"as":[135],"repo-specialized":[136],"experts":[137],"breaks":[139],"established":[140],"scaling":[141],"trends,":[142],"outperforming":[143],"up":[146],"larger":[148],"(e.g.,":[149,159],"CWM":[150],"by":[151],"Meta,":[152],"Qwen3-Coder-30B)":[153],"surpassing/matching":[155],"efficiency-focused":[156],"commercial":[157],"GPT-4.1-mini,":[160],"GPT-5-nano)":[161],"multiple":[163],"SWE":[164],"tasks.":[165],"Further":[166],"analysis":[167],"reveals":[168],"RCL":[170],"yields":[171],"higher":[172],"training":[173],"sample":[174],"efficiency":[175],"lower":[177],"inference":[178],"costs,":[179],"emphasizing":[180],"building":[183],"efficient":[184],"intelligence,":[185],"mastery":[187],"is":[188],"distinct":[190],"necessary":[192],"dimension":[193],"complements":[195],"general":[196],"capability.":[198]},"counts_by_year":[],"updated_date":"2026-02-01T03:38:14.988550","created_date":"2026-02-01T00:00:00"}
