{"id":"https://openalex.org/W4404399250","doi":"https://doi.org/10.48550/arxiv.2411.04704","title":"Distinguishing LLM-generated from Human-written Code by Contrastive Learning","display_name":"Distinguishing LLM-generated from Human-written Code by Contrastive Learning","publication_year":2024,"publication_date":"2024-11-07","ids":{"openalex":"https://openalex.org/W4404399250","doi":"https://doi.org/10.48550/arxiv.2411.04704"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2411.04704","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2411.04704","pdf_url":"https://arxiv.org/pdf/2411.04704","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2411.04704","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100620030","display_name":"Xiaodan Xu","orcid":"https://orcid.org/0000-0002-9650-9156"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Xu, Xiaodan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102737361","display_name":"Chao Ni","orcid":"https://orcid.org/0000-0002-2906-0598"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ni, Chao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100566349","display_name":"Xinrong Guo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Guo, Xinrong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5086725968","display_name":"Shixian Liu","orcid":"https://orcid.org/0009-0001-1756-3366"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Shaoxuan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100696824","display_name":"Jian Wang","orcid":"https://orcid.org/0000-0002-4316-932X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Xiaoya","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100374023","display_name":"Kui Liu","orcid":"https://orcid.org/0000-0003-0145-615X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Kui","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5101577570","display_name":"Xiaohu Yang","orcid":"https://orcid.org/0000-0003-4111-4189"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Xiaohu","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5100620030"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9921000003814697,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9921000003814697,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6121690273284912},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.5878744721412659},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5364077687263489},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.4888255000114441},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.43074941635131836},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.3688396215438843},{"id":"https://openalex.org/keywords/philosophy","display_name":"Philosophy","score":0.10291516780853271}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6121690273284912},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.5878744721412659},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5364077687263489},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.4888255000114441},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.43074941635131836},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.3688396215438843},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.10291516780853271},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2411.04704","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2411.04704","pdf_url":"https://arxiv.org/pdf/2411.04704","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"doi:10.48550/arxiv.2411.04704","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2411.04704","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2411.04704","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2411.04704","pdf_url":"https://arxiv.org/pdf/2411.04704","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G2087396116","display_name":null,"funder_award_id":"China","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G2376276132","display_name":null,"funder_award_id":"China","funder_id":"https://openalex.org/F4320335787","funder_display_name":"Fundamental Research Funds for the Central Universities"},{"id":"https://openalex.org/G3041480715","display_name":null,"funder_award_id":"2022J184","funder_id":"https://openalex.org/F4320332587","funder_display_name":"Natural Science Foundation of Ningbo"},{"id":"https://openalex.org/G3085993365","display_name":null,"funder_award_id":"(Grant No.","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3317480652","display_name":null,"funder_award_id":"Science","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G37568934","display_name":null,"funder_award_id":"Grant","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G391238517","display_name":null,"funder_award_id":", and","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5088726832","display_name":null,"funder_award_id":"62202419","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5249178904","display_name":null,"funder_award_id":"Grant No. 6","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5939423041","display_name":null,"funder_award_id":"Technology","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5994120800","display_name":null,"funder_award_id":"Natural","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G7726157001","display_name":null,"funder_award_id":"Grant No.","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G7740246774","display_name":null,"funder_award_id":"226-2022-00064","funder_id":"https://openalex.org/F4320335787","funder_display_name":"Fundamental Research Funds for the Central Universities"},{"id":"https://openalex.org/G8951484681","display_name":null,"funder_award_id":"Grant","funder_id":"https://openalex.org/F4320335787","funder_display_name":"Fundamental Research Funds for the Central Universities"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320322927","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884"},{"id":"https://openalex.org/F4320332587","display_name":"Natural Science Foundation of Ningbo","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320335787","display_name":"Fundamental Research Funds for the Central Universities","ror":null},{"id":"https://openalex.org/F4320338464","display_name":"Natural Science Foundation of Zhejiang Province","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4404399250.pdf","grobid_xml":"https://content.openalex.org/works/W4404399250.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4231937131","https://openalex.org/W3188962172","https://openalex.org/W323219885","https://openalex.org/W2063928587","https://openalex.org/W2772917594","https://openalex.org/W1487966966","https://openalex.org/W4312825515","https://openalex.org/W1589342014","https://openalex.org/W4306742369","https://openalex.org/W3204019825"],"abstract_inverted_index":{"Large":[0],"language":[1,74],"models":[2],"(LLMs),":[3],"such":[4,48],"as":[5,49],"ChatGPT":[6],"released":[7],"by":[8,91],"OpenAI,":[9],"have":[10,63],"attracted":[11],"significant":[12],"attention":[13],"from":[14,122,180],"both":[15],"industry":[16],"and":[17,52,58,105,131,142,150,162,174],"academia":[18],"due":[19],"to":[20,24,87],"their":[21,42,184],"demonstrated":[22],"ability":[23],"generate":[25],"high-quality":[26],"content":[27,61,75],"for":[28,71],"various":[29,46],"tasks.":[30],"Despite":[31],"the":[32,78,114,157,166,172],"impressive":[33],"capabilities":[34],"of":[35,81,116,140,165,168,176],"LLMs,":[36],"there":[37],"are":[38,68],"growing":[39],"concerns":[40],"regarding":[41],"potential":[43],"risks":[44],"in":[45],"fields,":[47],"news,":[50],"education,":[51],"software":[53],"engineering.":[54],"Recently,":[55],"several":[56],"commercial":[57],"open-source":[59],"LLM-generated":[60],"detectors":[62],"been":[64],"proposed,":[65],"which,":[66],"however,":[67],"primarily":[69],"designed":[70],"detecting":[72],"natural":[73],"without":[76],"considering":[77],"specific":[79],"characteristics":[80,167],"program":[82],"code.":[83],"This":[84],"paper":[85],"aims":[86],"fill":[88],"this":[89],"gap":[90],"proposing":[92],"a":[93,101,106,128],"novel":[94],"ChatGPT-generated":[95,120,143,169,178,196],"code":[96,121,144,148,153,170,179,182],"detector,":[97],"CodeGPTSensor,":[98],"based":[99],"on":[100,118,156],"contrastive":[102],"learning":[103],"framework":[104],"semantic":[107],"encoder":[108],"built":[109],"with":[110,183],"UniXcoder.":[111],"To":[112],"assess":[113],"effectiveness":[115],"CodeGPTSensor":[117,192],"differentiating":[119],"human-written":[123,141,181],"code,":[124,197],"we":[125],"first":[126],"curate":[127],"large-scale":[129],"Human":[130],"Machine":[132],"comparison":[133],"Corpus":[134],"(HMCorp),":[135],"which":[136],"includes":[137],"550K":[138],"pairs":[139,149],"(i.e.,":[145],"288K":[146],"Python":[147],"222K":[151],"Java":[152],"pairs).":[154],"Based":[155],"HMCorp":[158],"dataset,":[159],"our":[160],"qualitative":[161],"quantitative":[163],"analysis":[164],"reveals":[171],"challenge":[173],"opportunity":[175],"distinguishing":[177],"representative":[185],"features.":[186],"Our":[187],"experimental":[188],"results":[189],"indicate":[190],"that":[191],"can":[193],"effectively":[194],"identify":[195],"outperforming":[198],"all":[199],"selected":[200],"baselines.":[201]},"counts_by_year":[],"updated_date":"2026-04-12T07:58:50.170612","created_date":"2025-10-10T00:00:00"}
