{"id":"https://openalex.org/W7154993647","doi":"https://doi.org/10.48550/arxiv.2604.16058","title":"LLMSniffer: Detecting LLM-Generated Code via GraphCodeBERT and Supervised Contrastive Learning","display_name":"LLMSniffer: Detecting LLM-Generated Code via GraphCodeBERT and Supervised Contrastive Learning","publication_year":2026,"publication_date":"2026-04-17","ids":{"openalex":"https://openalex.org/W7154993647","doi":"https://doi.org/10.48550/arxiv.2604.16058"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.16058","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.16058","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.16058","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5115729339","display_name":"Mahir Labib Dihan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dihan, Mahir Labib","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5116077760","display_name":"Abir Muhtasim","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Muhtasim, Abir","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.4763999879360199,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.4763999879360199,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11241","display_name":"Advanced Malware Detection Techniques","score":0.09279999881982803,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.05480000004172325,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/preprocessor","display_name":"Preprocessor","score":0.6730999946594238},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.5722000002861023},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.5647000074386597},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.552299976348877},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.46790000796318054},{"id":"https://openalex.org/keywords/source-code","display_name":"Source code","score":0.3921999931335449},{"id":"https://openalex.org/keywords/data-pre-processing","display_name":"Data pre-processing","score":0.38359999656677246}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7782999873161316},{"id":"https://openalex.org/C34736171","wikidata":"https://www.wikidata.org/wiki/Q918333","display_name":"Preprocessor","level":2,"score":0.6730999946594238},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.5722000002861023},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.5647000074386597},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.552299976348877},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5260000228881836},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.46790000796318054},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.3921999931335449},{"id":"https://openalex.org/C10551718","wikidata":"https://www.wikidata.org/wiki/Q5227332","display_name":"Data pre-processing","level":2,"score":0.38359999656677246},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3831999897956848},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3765999972820282},{"id":"https://openalex.org/C136389625","wikidata":"https://www.wikidata.org/wiki/Q334384","display_name":"Supervised learning","level":3,"score":0.36660000681877136},{"id":"https://openalex.org/C117447612","wikidata":"https://www.wikidata.org/wiki/Q1412670","display_name":"Software quality","level":4,"score":0.3149999976158142},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.30480000376701355},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.2741999924182892},{"id":"https://openalex.org/C529173508","wikidata":"https://www.wikidata.org/wiki/Q638608","display_name":"Software development","level":3,"score":0.2624000012874603},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.259799987077713}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.16058","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.16058","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.16058","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.16058","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"The":[0],"rapid":[1],"proliferation":[2],"of":[3],"Large":[4],"Language":[5],"Models":[6],"(LLMs)":[7],"in":[8],"software":[9,31],"development":[10],"has":[11],"made":[12],"distinguishing":[13],"AI-generated":[14],"code":[15,18,27],"from":[16,77,88],"human-written":[17],"a":[19,36,43,116],"critical":[20],"challenge":[21],"with":[22,50],"implications":[23],"for":[24],"academic":[25],"integrity,":[26],"quality":[28],"assurance,":[29],"and":[30,54,65,87,115],"security.":[32],"We":[33,108],"present":[34],"LLMSniffer,":[35],"detection":[37],"framework":[38],"that":[39,101],"fine-tunes":[40],"GraphCodeBERT":[41],"using":[42],"two-stage":[44],"supervised":[45],"contrastive":[46,102],"learning":[47],"pipeline":[48],"augmented":[49],"comment":[51],"removal":[52],"preprocessing":[53],"an":[55],"MLP":[56],"classifier.":[57],"Evaluated":[58],"on":[59,81,92],"two":[60],"benchmark":[61],"datasets":[62],"-":[63,67],"GPTSniffer":[64,82],"Whodunit":[66,93],"LLMSniffer":[68],"achieves":[69],"substantial":[70],"improvements":[71],"over":[72],"prior":[73],"baselines:":[74],"accuracy":[75],"increases":[76],"70%":[78],"to":[79,85,90,96,120],"78%":[80],"(F1:":[83,94],"68%":[84],"78%)":[86],"91%":[89,95],"94.65%":[91],"94.64%).":[97],"t-SNE":[98],"visualizations":[99],"confirm":[100],"fine-tuning":[103],"yields":[104],"well-separated,":[105],"compact":[106],"embeddings.":[107],"release":[109],"our":[110],"model":[111],"checkpoints,":[112],"datasets,":[113],"codes":[114],"live":[117],"interactive":[118],"demo":[119],"facilitate":[121],"further":[122],"research.":[123]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-04-21T00:00:00"}
