{"id":"https://openalex.org/W4411450182","doi":"https://doi.org/10.1145/3729390","title":"Demystifying Memorization in LLM-Based Program Repair via a General Hypothesis Testing Framework","display_name":"Demystifying Memorization in LLM-Based Program Repair via a General Hypothesis Testing Framework","publication_year":2025,"publication_date":"2025-06-19","ids":{"openalex":"https://openalex.org/W4411450182","doi":"https://doi.org/10.1145/3729390"},"language":"en","primary_location":{"id":"doi:10.1145/3729390","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3729390","pdf_url":null,"source":{"id":"https://openalex.org/S4404663975","display_name":"Proceedings of the ACM on software engineering.","issn_l":"2994-970X","issn":["2994-970X"],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM on Software Engineering","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://ink.library.smu.edu.sg/sis_research/10323","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5111141245","display_name":"Jiaolong Kong","orcid":"https://orcid.org/0009-0001-8248-1981"},"institutions":[{"id":"https://openalex.org/I79891267","display_name":"Singapore Management University","ror":"https://ror.org/050qmg959","country_code":"SG","type":"education","lineage":["https://openalex.org/I79891267"]}],"countries":["SG"],"is_corresponding":true,"raw_author_name":"Jiaolong Kong","raw_affiliation_strings":["Singapore Management University, Singapore, Singapore"],"raw_orcid":"https://orcid.org/0009-0001-8248-1981","affiliations":[{"raw_affiliation_string":"Singapore Management University, Singapore, Singapore","institution_ids":["https://openalex.org/I79891267"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5084396416","display_name":"Xiaofei Xie","orcid":"https://orcid.org/0000-0002-1288-6502"},"institutions":[{"id":"https://openalex.org/I79891267","display_name":"Singapore Management University","ror":"https://ror.org/050qmg959","country_code":"SG","type":"education","lineage":["https://openalex.org/I79891267"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Xiaofei Xie","raw_affiliation_strings":["Singapore Management University, Singapore, Singapore"],"raw_orcid":"https://orcid.org/0000-0002-1288-6502","affiliations":[{"raw_affiliation_string":"Singapore Management University, Singapore, Singapore","institution_ids":["https://openalex.org/I79891267"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5045943684","display_name":"Shangqing Liu","orcid":"https://orcid.org/0000-0002-5598-4006"},"institutions":[{"id":"https://openalex.org/I881766915","display_name":"Nanjing University","ror":"https://ror.org/01rxvg760","country_code":"CN","type":"education","lineage":["https://openalex.org/I881766915"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shangqing Liu","raw_affiliation_strings":["Nanjing University, Nanjing, China"],"raw_orcid":"https://orcid.org/0000-0002-5598-4006","affiliations":[{"raw_affiliation_string":"Nanjing University, Nanjing, China","institution_ids":["https://openalex.org/I881766915"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5111141245"],"corresponding_institution_ids":["https://openalex.org/I79891267"],"apc_list":null,"apc_paid":null,"fwci":12.7744,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.9836766,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":95,"max":99},"biblio":{"volume":"2","issue":"FSE","first_page":"2712","last_page":"2734"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9952999949455261,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9952999949455261,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12127","display_name":"Software System Performance and Reliability","score":0.9883999824523926,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12423","display_name":"Software Reliability and Analysis Research","score":0.9775000214576721,"subfield":{"id":"https://openalex.org/subfields/1712","display_name":"Software"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/memorization","display_name":"Memorization","score":0.9334211945533752},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.697050154209137},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.6591943502426147},{"id":"https://openalex.org/keywords/event","display_name":"Event (particle physics)","score":0.6096839308738708},{"id":"https://openalex.org/keywords/null-hypothesis","display_name":"Null hypothesis","score":0.5946192741394043},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.5417848229408264},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.5026252269744873},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4783610701560974},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.46281349658966064},{"id":"https://openalex.org/keywords/cognitive-psychology","display_name":"Cognitive psychology","score":0.2597016990184784},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.22820252180099487},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.17856836318969727},{"id":"https://openalex.org/keywords/econometrics","display_name":"Econometrics","score":0.12272840738296509},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.10599860548973083},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.09418508410453796}],"concepts":[{"id":"https://openalex.org/C30038468","wikidata":"https://www.wikidata.org/wiki/Q4354775","display_name":"Memorization","level":2,"score":0.9334211945533752},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.697050154209137},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.6591943502426147},{"id":"https://openalex.org/C2779662365","wikidata":"https://www.wikidata.org/wiki/Q5416694","display_name":"Event (particle physics)","level":2,"score":0.6096839308738708},{"id":"https://openalex.org/C191988596","wikidata":"https://www.wikidata.org/wiki/Q628374","display_name":"Null hypothesis","level":2,"score":0.5946192741394043},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.5417848229408264},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5026252269744873},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4783610701560974},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.46281349658966064},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.2597016990184784},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.22820252180099487},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.17856836318969727},{"id":"https://openalex.org/C149782125","wikidata":"https://www.wikidata.org/wiki/Q160039","display_name":"Econometrics","level":1,"score":0.12272840738296509},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.10599860548973083},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.09418508410453796},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3729390","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3729390","pdf_url":null,"source":{"id":"https://openalex.org/S4404663975","display_name":"Proceedings of the ACM on software engineering.","issn_l":"2994-970X","issn":["2994-970X"],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM on Software Engineering","raw_type":"journal-article"},{"id":"pmh:oai:ink.library.smu.edu.sg:sis_research-11324","is_oa":true,"landing_page_url":"https://ink.library.smu.edu.sg/sis_research/10323","pdf_url":null,"source":{"id":"https://openalex.org/S4306401925","display_name":"Singapore Management University Institutional Knowledge (InK) (Singapore Management University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I79891267","host_organization_name":"Singapore Management University","host_organization_lineage":["https://openalex.org/I79891267"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"https://dl.acm.org/doi/10.1145/3729390","raw_type":"Conference Proceeding Article"}],"best_oa_location":{"id":"pmh:oai:ink.library.smu.edu.sg:sis_research-11324","is_oa":true,"landing_page_url":"https://ink.library.smu.edu.sg/sis_research/10323","pdf_url":null,"source":{"id":"https://openalex.org/S4306401925","display_name":"Singapore Management University Institutional Knowledge (InK) (Singapore Management University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I79891267","host_organization_name":"Singapore Management University","host_organization_lineage":["https://openalex.org/I79891267"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"https://dl.acm.org/doi/10.1145/3729390","raw_type":"Conference Proceeding Article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":25,"referenced_works":["https://openalex.org/W89527454","https://openalex.org/W1964888795","https://openalex.org/W2044098639","https://openalex.org/W2077801393","https://openalex.org/W2158195707","https://openalex.org/W4224308101","https://openalex.org/W4285490400","https://openalex.org/W4285827657","https://openalex.org/W4295646147","https://openalex.org/W4301221590","https://openalex.org/W4308643994","https://openalex.org/W4384347367","https://openalex.org/W4385565597","https://openalex.org/W4388505012","https://openalex.org/W4389162688","https://openalex.org/W4391558518","https://openalex.org/W4398785939","https://openalex.org/W4400190916","https://openalex.org/W4400878080","https://openalex.org/W4402443087","https://openalex.org/W4402670101","https://openalex.org/W4404953085","https://openalex.org/W4405602523","https://openalex.org/W6810081322","https://openalex.org/W6870251470"],"related_works":["https://openalex.org/W3093895509","https://openalex.org/W3163481960","https://openalex.org/W2323394100","https://openalex.org/W280704926","https://openalex.org/W2476068070","https://openalex.org/W4323971310","https://openalex.org/W2893372175","https://openalex.org/W4283526844","https://openalex.org/W2787003449","https://openalex.org/W4205490113"],"abstract_inverted_index":{"Large":[0],"Language":[1],"Models":[2],"(LLMs)":[3],"have":[4],"achieved":[5],"remarkable":[6],"success":[7],"in":[8,12,53,86,194,210,244,291,313,355],"various":[9,97],"applications,":[10],"particularly":[11],"code-related":[13],"tasks":[14],"such":[15,148],"as":[16,118,290],"code":[17,187],"generation":[18],"and":[19,182,214,248,252,281,309,352,368],"program":[20,66,196],"repair,":[21],"setting":[22],"new":[23,217],"performance":[24],"benchmarks.":[25],"However,":[26],"the":[27,61,71,78,87,136,140,153,156,191,211,226,241,261,265,292,338,346],"extensive":[28],"use":[29],"of":[30,47,80,89,147,155,225,236,275,300],"large":[31],"training":[32,48,212,227],"corpora":[33],"raises":[34],"concerns":[35],"about":[36],"whether":[37,70],"these":[38],"achievements":[39],"stem":[40],"from":[41],"genuine":[42],"understanding":[43],"or":[44],"mere":[45,369],"memorization":[46,62,116,192,306],"data\u2014a":[49],"question":[50],"often":[51],"overlooked":[52],"current":[54],"research.":[55],"This":[56],"paper":[57],"aims":[58],"to":[59,96,152,174,222,260,286],"study":[60,344],"issue":[63,193],"within":[64],"LLM-based":[65,195,356],"repair":[67,197],"by":[68,75,130],"investigating":[69],"correct":[72,237],"patches":[73,238],"generated":[74],"LLMs":[76],"are":[77,319],"result":[79],"memorization.":[81,161,301,370],"The":[82,145,322,343],"key":[83],"challenge":[84],"lies":[85],"absence":[88],"ground":[90,242],"truth":[91],"for":[92,101,340,349],"confirming":[93],"memorization,":[94],"leading":[95],"ad-hoc":[98],"methods":[99,170,308],"designed":[100],"its":[102],"detection.":[103],"To":[104],"address":[105],"this":[106,164,314],"challenge,":[107],"we":[108,166,303],"first":[109],"propose":[110],"a":[111,119,132,202,216,233,272,297,361],"general":[112,120],"framework":[113,329],"that":[114,139,206,219,232,331],"formalizes":[115],"detection":[117,307],"hypothesis":[121,138,327],"testing":[122,328],"problem,":[123],"where":[124,264],"existing":[125,305],"approaches":[126],"can":[127],"be":[128,223,270,287],"unified":[129],"defining":[131],"low-probability":[133,172],"event":[134,150],"under":[135,325],"null":[137,157],"data":[141],"is":[142,207,220],"not":[143,269,336],"memorized.":[144],"occurrence":[146],"an":[149],"leads":[151],"rejection":[154],"hypothesis,":[158],"indicating":[159,296],"potential":[160,176],"Based":[162],"on":[163,250,279,283],"framework,":[165],"design":[167],"two":[168,199],"specific":[169],"(i.e.,":[171],"events)":[173],"detect":[175],"memorization:":[177],"1)":[178],"basic":[179],"ground-truth":[180],"matching,":[181],"2)":[183],"reassessment":[184],"after":[185,257],"substantial":[186],"mutation.":[188],"We":[189],"investigate":[190],"using":[198],"datasets:":[200],"Defects4J,":[201],"widely":[203],"used":[204],"benchmark":[205],"likely":[208],"included":[209],"data,":[213],"GitBug-Java,":[215],"dataset":[218],"unlikely":[221],"part":[224],"data.":[228],"Our":[229],"findings":[230],"reveal":[231],"significant":[234,258],"portion":[235],"exactly":[239,289],"match":[240],"truths":[243],"Defects4J":[245],"(e.g.,":[246,277,316],"78.83%":[247],"87.42%":[249],"GPT-3.5":[251,280],"CodeLlama-7b,":[253],"respectively).":[254],"Moreover,":[255],"even":[256],"modifications":[259],"buggy":[262],"code,":[263],"original":[266,293],"repairs":[267],"should":[268],"generated,":[271],"considerable":[273],"percentage":[274],"bugs":[276],"81.82%":[278],"88.24%":[282],"CodeLlama-7b)":[284],"continue":[285],"fixed":[288],"bug":[294],"fixes,":[295],"high":[298],"likelihood":[299],"Furthermore,":[302],"evaluate":[304],"demonstrate":[310],"their":[311,332],"ineffectiveness":[312],"context":[315],"most":[317],"AUROCs":[318],"below":[320],"0.5).":[321],"theoretical":[323],"analysis":[324],"our":[326],"shows":[330],"defined":[333],"events":[334],"may":[335],"meet":[337],"requirements":[339],"being":[341],"low-probability.":[342],"highlights":[345],"critical":[347],"need":[348],"more":[350],"robust":[351],"rigorous":[353],"evaluations":[354],"software":[357],"engineering":[358],"research,":[359],"ensuring":[360],"clear":[362],"distinction":[363],"between":[364],"true":[365],"problem-solving":[366],"capabilities":[367]},"counts_by_year":[{"year":2026,"cited_by_count":3},{"year":2025,"cited_by_count":2}],"updated_date":"2026-05-21T06:26:12.895304","created_date":"2025-10-10T00:00:00"}
