{"id":"https://openalex.org/W4392619070","doi":"https://doi.org/10.1145/3643991.3644926","title":"Whodunit: Classifying Code as Human Authored or GPT-4 Generated - A case study on CodeChef problems","display_name":"Whodunit: Classifying Code as Human Authored or GPT-4 Generated - A case study on CodeChef problems","publication_year":2024,"publication_date":"2024-04-15","ids":{"openalex":"https://openalex.org/W4392619070","doi":"https://doi.org/10.1145/3643991.3644926"},"language":"en","primary_location":{"id":"doi:10.1145/3643991.3644926","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3643991.3644926","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 21st International Conference on Mining Software Repositories","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2403.04013","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5028054171","display_name":"Oseremen Joy Idialu","orcid":"https://orcid.org/0000-0003-3037-4241"},"institutions":[{"id":"https://openalex.org/I151746483","display_name":"University of Waterloo","ror":"https://ror.org/01aff2v68","country_code":"CA","type":"education","lineage":["https://openalex.org/I151746483"]}],"countries":["CA"],"is_corresponding":true,"raw_author_name":"Oseremen Joy Idialu","raw_affiliation_strings":["University of Waterloo, Waterloo, Ontario, Canada"],"raw_orcid":"https://orcid.org/0000-0003-3037-4241","affiliations":[{"raw_affiliation_string":"University of Waterloo, Waterloo, Ontario, Canada","institution_ids":["https://openalex.org/I151746483"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5054968221","display_name":"Noble Saji Mathews","orcid":"https://orcid.org/0000-0003-2266-8848"},"institutions":[{"id":"https://openalex.org/I151746483","display_name":"University of Waterloo","ror":"https://ror.org/01aff2v68","country_code":"CA","type":"education","lineage":["https://openalex.org/I151746483"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Noble Saji Mathews","raw_affiliation_strings":["University of Waterloo, Waterloo, Ontario, Canada"],"raw_orcid":"https://orcid.org/0000-0003-2266-8848","affiliations":[{"raw_affiliation_string":"University of Waterloo, Waterloo, Ontario, Canada","institution_ids":["https://openalex.org/I151746483"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5087189180","display_name":"Rungroj Maipradit","orcid":"https://orcid.org/0000-0003-4286-9807"},"institutions":[{"id":"https://openalex.org/I151746483","display_name":"University of Waterloo","ror":"https://ror.org/01aff2v68","country_code":"CA","type":"education","lineage":["https://openalex.org/I151746483"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Rungroj Maipradit","raw_affiliation_strings":["University of Waterloo, Waterloo, Ontario, Canada"],"raw_orcid":"https://orcid.org/0000-0003-4286-9807","affiliations":[{"raw_affiliation_string":"University of Waterloo, Waterloo, Ontario, Canada","institution_ids":["https://openalex.org/I151746483"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5000279490","display_name":"Joanne M. Atlee","orcid":"https://orcid.org/0000-0002-0760-526X"},"institutions":[{"id":"https://openalex.org/I151746483","display_name":"University of Waterloo","ror":"https://ror.org/01aff2v68","country_code":"CA","type":"education","lineage":["https://openalex.org/I151746483"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Joanne M. Atlee","raw_affiliation_strings":["University of Waterloo, Waterloo, Ontario, Canada"],"raw_orcid":"https://orcid.org/0000-0002-0760-526X","affiliations":[{"raw_affiliation_string":"University of Waterloo, Waterloo, Ontario, Canada","institution_ids":["https://openalex.org/I151746483"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5029916309","display_name":"Meiyappan Nagappan","orcid":"https://orcid.org/0000-0003-4533-4728"},"institutions":[{"id":"https://openalex.org/I151746483","display_name":"University of Waterloo","ror":"https://ror.org/01aff2v68","country_code":"CA","type":"education","lineage":["https://openalex.org/I151746483"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Mei Nagappan","raw_affiliation_strings":["University of Waterloo, Waterloo, Ontario, Canada"],"raw_orcid":"https://orcid.org/0000-0003-4533-4728","affiliations":[{"raw_affiliation_string":"University of Waterloo, Waterloo, Ontario, Canada","institution_ids":["https://openalex.org/I151746483"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5028054171"],"corresponding_institution_ids":["https://openalex.org/I151746483"],"apc_list":null,"apc_paid":null,"fwci":10.7311,"has_fulltext":true,"cited_by_count":15,"citation_normalized_percentile":{"value":0.9814636,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"394","last_page":"406"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9945999979972839,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9945999979972839,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11492","display_name":"Academic integrity and plagiarism","score":0.954200029373169,"subfield":{"id":"https://openalex.org/subfields/3311","display_name":"Safety Research"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11241","display_name":"Advanced Malware Detection Techniques","score":0.911899983882904,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/classifier","display_name":"Classifier (UML)","score":0.7944793701171875},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7873561382293701},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.6418177485466003},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.628619372844696},{"id":"https://openalex.org/keywords/genetic-programming","display_name":"Genetic programming","score":0.6018726229667664},{"id":"https://openalex.org/keywords/stylometry","display_name":"Stylometry","score":0.5561281442642212},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.45514917373657227},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.4511045217514038},{"id":"https://openalex.org/keywords/source-code","display_name":"Source code","score":0.4264160692691803},{"id":"https://openalex.org/keywords/receiver-operating-characteristic","display_name":"Receiver operating characteristic","score":0.4261280298233032},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.25191396474838257}],"concepts":[{"id":"https://openalex.org/C95623464","wikidata":"https://www.wikidata.org/wiki/Q1096149","display_name":"Classifier (UML)","level":2,"score":0.7944793701171875},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7873561382293701},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.6418177485466003},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.628619372844696},{"id":"https://openalex.org/C110332635","wikidata":"https://www.wikidata.org/wiki/Q629498","display_name":"Genetic programming","level":2,"score":0.6018726229667664},{"id":"https://openalex.org/C11192451","wikidata":"https://www.wikidata.org/wiki/Q2032038","display_name":"Stylometry","level":2,"score":0.5561281442642212},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.45514917373657227},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.4511045217514038},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.4264160692691803},{"id":"https://openalex.org/C58471807","wikidata":"https://www.wikidata.org/wiki/Q327120","display_name":"Receiver operating characteristic","level":2,"score":0.4261280298233032},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.25191396474838257},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1145/3643991.3644926","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3643991.3644926","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 21st International Conference on Mining Software Repositories","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2403.04013","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2403.04013","pdf_url":"https://arxiv.org/pdf/2403.04013","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"pmh:oai:uwspace.uwaterloo.ca:10012/20384","is_oa":true,"landing_page_url":"http://hdl.handle.net/10012/20384","pdf_url":"https://uwspace.uwaterloo.ca/bitstreams/c546f3d7-9dc1-4d2e-bfa6-298d3c844d5b/download","source":{"id":"https://openalex.org/S4306401661","display_name":"UWSpace (University of Waterloo)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I151746483","host_organization_name":"University of Waterloo","host_organization_lineage":["https://openalex.org/I151746483"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Conference Paper"},{"id":"pmh:doi:10.5281/zenodo.10152237","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"ConferencePaper"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2403.04013","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2403.04013","pdf_url":"https://arxiv.org/pdf/2403.04013","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.75}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4392619070.pdf","grobid_xml":"https://content.openalex.org/works/W4392619070.grobid-xml"},"referenced_works_count":35,"referenced_works":["https://openalex.org/W1964962870","https://openalex.org/W1965556163","https://openalex.org/W1971922616","https://openalex.org/W1976892921","https://openalex.org/W2033020555","https://openalex.org/W2044802063","https://openalex.org/W2055940575","https://openalex.org/W2072164438","https://openalex.org/W2074078922","https://openalex.org/W2092816424","https://openalex.org/W2295598076","https://openalex.org/W2314503383","https://openalex.org/W2581748289","https://openalex.org/W2621743560","https://openalex.org/W2742956140","https://openalex.org/W2887773459","https://openalex.org/W2912148900","https://openalex.org/W2914961791","https://openalex.org/W2920590155","https://openalex.org/W2944943020","https://openalex.org/W2968478220","https://openalex.org/W2981756694","https://openalex.org/W2996517729","https://openalex.org/W3000180866","https://openalex.org/W3000586681","https://openalex.org/W3041456388","https://openalex.org/W3045562919","https://openalex.org/W3134197843","https://openalex.org/W3177186237","https://openalex.org/W4205883273","https://openalex.org/W4211263275","https://openalex.org/W4321162272","https://openalex.org/W4323033785","https://openalex.org/W4328028959","https://openalex.org/W4388954848"],"related_works":["https://openalex.org/W2895461980","https://openalex.org/W2187670843","https://openalex.org/W2027650462","https://openalex.org/W2768755876","https://openalex.org/W2285499887","https://openalex.org/W2155953932","https://openalex.org/W2187360386","https://openalex.org/W2619884807","https://openalex.org/W2266938806","https://openalex.org/W1971758741"],"abstract_inverted_index":{"Artificial":[0],"intelligence":[1],"(AI)":[2],"assistants":[3],"such":[4],"as":[5,61],"GitHub":[6],"Copilot":[7],"and":[8,73,81,91,104,128,145,155,158,184],"ChatGPT,":[9],"built":[10],"on":[11,138,165],"large":[12],"language":[13],"models":[14],"like":[15],"GPT-4,":[16],"are":[17,22,37],"revolutionizing":[18],"how":[19],"programming":[20,143],"tasks":[21],"performed,":[23],"raising":[24],"questions":[25,36],"about":[26],"whether":[27],"code":[28,60,71,172,183],"is":[29,174],"authored":[30],"by":[31,95],"generative":[32],"AI":[33],"models.":[34],"Such":[35],"of":[38,52,69,107,111,131,141],"particular":[39],"interest":[40],"to":[41,76],"educators,":[42],"who":[43],"worry":[44],"that":[45,114,147,171],"these":[46],"tools":[47],"enable":[48],"a":[49,175],"new":[50],"form":[51],"academic":[53],"dishonesty,":[54],"in":[55],"which":[56],"students":[57],"submit":[58],"AI-generated":[59],"their":[62],"work.":[63],"Our":[64,84,97,168],"research":[65],"explores":[66],"the":[67,139,142,159],"viability":[68],"using":[70],"stylometry":[72,173],"machine":[74],"learning":[75],"distinguish":[77],"between":[78,153,180],"GPT-4":[79,181],"generated":[80,94,182],"human-authored":[82,87,185],"code.":[83,186],"dataset":[85],"comprises":[86],"solutions":[88,93],"from":[89],"CodeChef":[90],"AI-authored":[92],"GPT-4.":[96],"classifier":[98,113,137,160],"outperforms":[99],"baselines,":[100],"with":[101,125],"an":[102,126],"F1-score":[103,127],"AUC-ROC":[105,129],"score":[106,130],"0.91.":[108],"A":[109],"variant":[110],"our":[112,136],"excludes":[115],"gameable":[116],"features":[117],"(e.g.,":[118],"empty":[119],"lines,":[120],"whitespace)":[121],"still":[122],"performs":[123],"well":[124],"0.89.":[132],"We":[133],"also":[134],"evaluated":[135],"difficulty":[140],"problem":[144],"found":[146],"there":[148],"was":[149],"almost":[150],"no":[151],"difference":[152],"easier":[154],"intermediate":[156],"problems,":[157],"performed":[161],"only":[162],"slightly":[163],"worse":[164],"harder":[166],"problems.":[167],"study":[169],"shows":[170],"promising":[176],"approach":[177],"for":[178],"distinguishing":[179]},"counts_by_year":[{"year":2026,"cited_by_count":5},{"year":2025,"cited_by_count":8},{"year":2024,"cited_by_count":2}],"updated_date":"2026-05-20T08:49:12.498775","created_date":"2025-10-10T00:00:00"}
