{"id":"https://openalex.org/W7150814085","doi":"https://doi.org/10.48550/arxiv.2604.03113","title":"PAFT: Preservation Aware Fine-Tuning for Minimal-Edit Program Repair","display_name":"PAFT: Preservation Aware Fine-Tuning for Minimal-Edit Program Repair","publication_year":2026,"publication_date":"2026-04-03","ids":{"openalex":"https://openalex.org/W7150814085","doi":"https://doi.org/10.48550/arxiv.2604.03113"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.03113","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.03113","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.03113","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5133020868","display_name":"Boyang Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Yang, Boyang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133024254","display_name":"Zijian Cai","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cai, Zijian","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133031005","display_name":"Shunfu Jin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jin, Shunfu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":null,"display_name":"Tian, Haoye","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tian, Haoye","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5133020868"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10743","display_name":"Software Testing and Debugging Techniques","score":0.6909000277519226,"subfield":{"id":"https://openalex.org/subfields/1712","display_name":"Software"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10743","display_name":"Software Testing and Debugging Techniques","score":0.6909000277519226,"subfield":{"id":"https://openalex.org/subfields/1712","display_name":"Software"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.09200000017881393,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11986","display_name":"Scientific Computing and Data Management","score":0.03280000016093254,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.6485999822616577},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.6139000058174133},{"id":"https://openalex.org/keywords/suite","display_name":"Suite","score":0.6134999990463257},{"id":"https://openalex.org/keywords/test-suite","display_name":"Test suite","score":0.4690000116825104},{"id":"https://openalex.org/keywords/source-code","display_name":"Source code","score":0.38839998841285706},{"id":"https://openalex.org/keywords/signal","display_name":"SIGNAL (programming language)","score":0.3264999985694885}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7871999740600586},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.6485999822616577},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.6139000058174133},{"id":"https://openalex.org/C79581498","wikidata":"https://www.wikidata.org/wiki/Q1367530","display_name":"Suite","level":2,"score":0.6134999990463257},{"id":"https://openalex.org/C151552104","wikidata":"https://www.wikidata.org/wiki/Q7705809","display_name":"Test suite","level":4,"score":0.4690000116825104},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.42080000042915344},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.38839998841285706},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.37130001187324524},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.3546999990940094},{"id":"https://openalex.org/C2779843651","wikidata":"https://www.wikidata.org/wiki/Q7390335","display_name":"SIGNAL (programming language)","level":2,"score":0.3264999985694885},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.3041999936103821},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.2872999906539917},{"id":"https://openalex.org/C1009929","wikidata":"https://www.wikidata.org/wiki/Q179550","display_name":"Software bug","level":3,"score":0.26899999380111694},{"id":"https://openalex.org/C2988963302","wikidata":"https://www.wikidata.org/wiki/Q629206","display_name":"Program code","level":2,"score":0.2533000111579895}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.03113","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.03113","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.03113","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.03113","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.7336533665657043,"display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Large":[0],"language":[1],"models":[2],"(LLMs)":[3],"are":[4,37],"effective":[5],"for":[6,65],"automated":[7],"program":[8,67],"repair,":[9],"but":[10],"plausible":[11,158],"patches":[12,159],"that":[13],"pass":[14],"the":[15],"full":[16],"test":[17],"suite":[18],"often":[19],"rewrite":[20],"more":[21,156],"code":[22],"than":[23],"necessary,":[24],"increasing":[25],"review":[26],"and":[27,53,77,85,92,148],"maintenance":[28],"costs.":[29],"This":[30],"over-editing":[31],"is":[32],"common":[33],"because":[34],"most":[35],"bugs":[36],"localized,":[38,157],"while":[39,106,135],"standard":[40,102],"supervised":[41,103],"fine-tuning":[42,63,104],"provides":[43],"no":[44],"explicit":[45],"signal":[46],"about":[47],"which":[48,54],"tokens":[49],"should":[50,55],"be":[51,56],"preserved":[52],"changed.":[57],"We":[58],"propose":[59],"PAFT,":[60],"a":[61,124],"preservation-aware":[62],"method":[64],"minimal-edit":[66],"repair.":[68],"PAFT":[69,94,120,144],"derives":[70],"token-level":[71],"preservation":[72],"signals":[73],"by":[74,97,112],"aligning":[75],"buggy":[76],"fixed":[78],"code,":[79],"combines":[80],"them":[81],"with":[82,118],"full-sequence":[83],"masking,":[84],"applies":[86],"an":[87],"edit-difficulty":[88],"curriculum.":[89],"Across":[90],"Defects4J":[91,117],"HumanEval-Java,":[93],"improves":[95],"pass@1":[96,130],"up":[98,113],"to":[99,114,133,141],"65.6%":[100],"over":[101],"(StdFT)":[105],"reducing":[107,136],"average":[108],"edit":[109],"distance":[110],"(AED)":[111],"32.6%.":[115],"On":[116],"DeepSeek-Coder-6.7B,":[119],"also":[121],"outperforms":[122],"AdaPatcher,":[123],"strong":[125],"preference-based":[126],"repair":[127],"baseline,":[128],"improving":[129],"from":[131,139],"5.9%":[132],"10.1%":[134],"median":[137],"AED":[138],"61.0":[140],"42.0.":[142],"Overall,":[143],"preserves":[145],"stable":[146],"context":[147],"concentrates":[149],"edits":[150],"on":[151],"faulty":[152],"regions,":[153],"yielding":[154],"smaller,":[155],"without":[160],"inference-time":[161],"search,":[162],"reranking,":[163],"or":[164],"post-processing.":[165]},"counts_by_year":[],"updated_date":"2026-04-21T08:09:41.155169","created_date":"2026-04-07T00:00:00"}
