{"id":"https://openalex.org/W7143529816","doi":"https://doi.org/10.48550/arxiv.2603.26091","title":"Search-Induced Issues in Web-Augmented LLM Code Generation: Detecting and Repairing Error-Inducing Pages","display_name":"Search-Induced Issues in Web-Augmented LLM Code Generation: Detecting and Repairing Error-Inducing Pages","publication_year":2026,"publication_date":"2026-03-27","ids":{"openalex":"https://openalex.org/W7143529816","doi":"https://doi.org/10.48550/arxiv.2603.26091"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.26091","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.26091","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.26091","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5130989867","display_name":"Guoqing Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Wang, Guoqing","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130946185","display_name":"Zeyu Sun","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sun, Zeyu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130924358","display_name":"Xiaofei Xie","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xie, Xiaofei","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130948607","display_name":"Yizhou Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Yizhou","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130997153","display_name":"Yanchao Tan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tan, Yanchao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130956734","display_name":"Yifan Zhao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhao, Yifan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5130979282","display_name":"Dan Hao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hao, Dan","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5130989867"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.7861999869346619,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.7861999869346619,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10743","display_name":"Software Testing and Debugging Techniques","score":0.06300000101327896,"subfield":{"id":"https://openalex.org/subfields/1712","display_name":"Software"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11241","display_name":"Advanced Malware Detection Techniques","score":0.02539999969303608,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.6431999802589417},{"id":"https://openalex.org/keywords/root-cause","display_name":"Root cause","score":0.5950999855995178},{"id":"https://openalex.org/keywords/implementation","display_name":"Implementation","score":0.5758000016212463},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.558899998664856},{"id":"https://openalex.org/keywords/reliability","display_name":"Reliability (semiconductor)","score":0.4975999891757965},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.4316999912261963},{"id":"https://openalex.org/keywords/safeguard","display_name":"Safeguard","score":0.42730000615119934},{"id":"https://openalex.org/keywords/code-generation","display_name":"Code generation","score":0.4115999937057495},{"id":"https://openalex.org/keywords/failure-mode-and-effects-analysis","display_name":"Failure mode and effects analysis","score":0.3953000009059906}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.75},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.6431999802589417},{"id":"https://openalex.org/C84945661","wikidata":"https://www.wikidata.org/wiki/Q7366567","display_name":"Root cause","level":2,"score":0.5950999855995178},{"id":"https://openalex.org/C26713055","wikidata":"https://www.wikidata.org/wiki/Q245962","display_name":"Implementation","level":2,"score":0.5758000016212463},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.558899998664856},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.5224999785423279},{"id":"https://openalex.org/C43214815","wikidata":"https://www.wikidata.org/wiki/Q7310987","display_name":"Reliability (semiconductor)","level":3,"score":0.4975999891757965},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.4316999912261963},{"id":"https://openalex.org/C2780771206","wikidata":"https://www.wikidata.org/wiki/Q3271761","display_name":"Safeguard","level":2,"score":0.42730000615119934},{"id":"https://openalex.org/C133162039","wikidata":"https://www.wikidata.org/wiki/Q1061077","display_name":"Code generation","level":3,"score":0.4115999937057495},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.41110000014305115},{"id":"https://openalex.org/C66283442","wikidata":"https://www.wikidata.org/wiki/Q1389268","display_name":"Failure mode and effects analysis","level":2,"score":0.3953000009059906},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.38350000977516174},{"id":"https://openalex.org/C171078966","wikidata":"https://www.wikidata.org/wiki/Q111029","display_name":"Root (linguistics)","level":2,"score":0.38040000200271606},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.3774000108242035},{"id":"https://openalex.org/C95713431","wikidata":"https://www.wikidata.org/wiki/Q631425","display_name":"Vulnerability (computing)","level":2,"score":0.3407999873161316},{"id":"https://openalex.org/C48677424","wikidata":"https://www.wikidata.org/wiki/Q6888088","display_name":"Mode (computer interface)","level":2,"score":0.33309999108314514},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.3328000009059906},{"id":"https://openalex.org/C130963320","wikidata":"https://www.wikidata.org/wiki/Q1401207","display_name":"Root cause analysis","level":2,"score":0.33169999718666077},{"id":"https://openalex.org/C137287247","wikidata":"https://www.wikidata.org/wiki/Q1329550","display_name":"Static program analysis","level":4,"score":0.33160001039505005},{"id":"https://openalex.org/C117447612","wikidata":"https://www.wikidata.org/wiki/Q1412670","display_name":"Software quality","level":4,"score":0.3249000012874603},{"id":"https://openalex.org/C22680326","wikidata":"https://www.wikidata.org/wiki/Q7444867","display_name":"Secure coding","level":5,"score":0.32280001044273376},{"id":"https://openalex.org/C150292731","wikidata":"https://www.wikidata.org/wiki/Q1342704","display_name":"Code review","level":5,"score":0.31779998540878296},{"id":"https://openalex.org/C2780378061","wikidata":"https://www.wikidata.org/wiki/Q25351891","display_name":"Service (business)","level":2,"score":0.30889999866485596},{"id":"https://openalex.org/C120936955","wikidata":"https://www.wikidata.org/wiki/Q2155640","display_name":"Empirical research","level":2,"score":0.3010999858379364},{"id":"https://openalex.org/C35578498","wikidata":"https://www.wikidata.org/wiki/Q193424","display_name":"Web service","level":2,"score":0.2865999937057495},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.2761000096797943},{"id":"https://openalex.org/C116537","wikidata":"https://www.wikidata.org/wiki/Q2169973","display_name":"Service provider","level":3,"score":0.2662000060081482},{"id":"https://openalex.org/C2779639559","wikidata":"https://www.wikidata.org/wiki/Q7661178","display_name":"Symbolic execution","level":3,"score":0.26269999146461487},{"id":"https://openalex.org/C509989072","wikidata":"https://www.wikidata.org/wiki/Q15188241","display_name":"Model-driven architecture","level":4,"score":0.26030001044273376},{"id":"https://openalex.org/C2779662365","wikidata":"https://www.wikidata.org/wiki/Q5416694","display_name":"Event (particle physics)","level":2,"score":0.25040000677108765}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.26091","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.26091","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.26091","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.26091","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Web-augmented":[0],"large":[1],"language":[2],"models":[3,18],"(LLMs)":[4],"offer":[5],"promising":[6],"capabilities":[7],"for":[8,201],"automatic":[9],"code":[10,88,156,208],"generation.":[11],"However,":[12],"integrating":[13],"live":[14],"web":[15],"search":[16,60],"exposes":[17],"to":[19,25,76,111,134,176,181],"unreliable":[20],"or":[21,86,153],"malicious":[22],"content,":[23],"leading":[24],"Search-Induced":[26],"Issues":[27],"(SII),":[28],"a":[29,46,122],"novel":[30],"failure":[31],"mode":[32],"in":[33,90,211],"which":[34],"external":[35],"pages":[36],"mislead":[37],"LLMs":[38,73],"into":[39],"producing":[40],"incorrect":[41],"code.":[42],"This":[43],"paper":[44],"presents":[45],"comprehensive":[47],"empirical":[48],"study":[49],"of":[50,55,174,183,205],"the":[51,91,136,187,203],"prevalence":[52],"and":[53,62,139,144,178,196],"impact":[54],"SII":[56,129],"across":[57,186],"three":[58],"commercial":[59],"APIs":[61],"six":[63],"advanced":[64],"LLMs.":[65],"Our":[66,194],"analysis":[67],"reveals":[68],"that":[69,106,125,166],"all":[70],"evaluated":[71,159,188],"web-augmented":[72,114,206],"are":[74],"vulnerable":[75],"SII,":[77],"with":[78,158,170,190],"root":[79,142],"causes":[80],"arising":[81],"from":[82,161],"either":[83,149],"misaligned":[84,151],"specifications":[85],"flawed":[87],"implementations":[89],"searched":[92],"Error-Inducing":[93],"Pages":[94],"(EIPs).":[95],"To":[96],"address":[97],"this":[98],"challenge,":[99],"we":[100],"propose":[101],"Sherlock,":[102],"an":[103,171],"automated":[104],"framework":[105,197],"enables":[107],"LLM":[108],"service":[109],"providers":[110],"proactively":[112],"safeguard":[113],"generation":[115,209],"systems":[116,210],"at":[117],"scale.":[118],"Sherlock":[119,167],"operates":[120],"as":[121],"continuous":[123],"pipeline":[124],"first":[126],"detects":[127],"potential":[128],"instances,":[130],"then":[131],"debugs":[132],"them":[133,147],"identify":[135],"responsible":[137],"EIPs":[138,169],"pinpoint":[140],"their":[141],"causes,":[143],"finally":[145],"repairs":[146,179],"by":[148],"annotating":[150],"content":[152],"replacing":[154],"erroneous":[155],"snippets":[157],"solutions":[160],"trusted":[162],"sources.":[163],"Experiments":[164],"show":[165],"identifies":[168],"F1":[172],"score":[173],"up":[175],"95%":[177],"71%":[180],"100%":[182],"affected":[184],"generations":[185],"models,":[189],"modest":[191],"computational":[192],"overhead.":[193],"findings":[195],"provide":[198],"practical":[199],"guidance":[200],"improving":[202],"reliability":[204],"LLM-based":[207],"real-world":[212],"software":[213],"engineering":[214],"scenarios.":[215]},"counts_by_year":[],"updated_date":"2026-03-31T06:07:48.031334","created_date":"2026-03-31T00:00:00"}
