{"id":"https://openalex.org/W7161169501","doi":"https://doi.org/10.1145/3746467.3801525","title":"Detecting AI-Generated Essays: A Hybrid Stylometric and TF-IDF Approach with Cross-Dataset Validation","display_name":"Detecting AI-Generated Essays: A Hybrid Stylometric and TF-IDF Approach with Cross-Dataset Validation","publication_year":2026,"publication_date":"2026-04-23","ids":{"openalex":"https://openalex.org/W7161169501","doi":"https://doi.org/10.1145/3746467.3801525"},"language":null,"primary_location":{"id":"doi:10.1145/3746467.3801525","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3746467.3801525","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2026 ACM Southeast Conference","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3746467.3801525","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5136185286","display_name":"Darcy Stacy","orcid":"https://orcid.org/0009-0007-3219-5657"},"institutions":[{"id":"https://openalex.org/I110325482","display_name":"Eastern Kentucky University","ror":"https://ror.org/012xks909","country_code":"US","type":"education","lineage":["https://openalex.org/I110325482"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Darcy Stacy","raw_affiliation_strings":["Eastern Kentucky University, Richmond, KY, USA"],"raw_orcid":"https://orcid.org/0009-0007-3219-5657","affiliations":[{"raw_affiliation_string":"Eastern Kentucky University, Richmond, KY, USA","institution_ids":["https://openalex.org/I110325482"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5075210621","display_name":"Lan Kong","orcid":"https://orcid.org/0000-0001-6098-9445"},"institutions":[{"id":"https://openalex.org/I110325482","display_name":"Eastern Kentucky University","ror":"https://ror.org/012xks909","country_code":"US","type":"education","lineage":["https://openalex.org/I110325482"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Lan Kong","raw_affiliation_strings":["Computer Science and Information Technology, Eastern Kentucky University, Richmond, KY, USA"],"raw_orcid":"https://orcid.org/0009-0009-9289-8383","affiliations":[{"raw_affiliation_string":"Computer Science and Information Technology, Eastern Kentucky University, Richmond, KY, USA","institution_ids":["https://openalex.org/I110325482"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5136185286"],"corresponding_institution_ids":["https://openalex.org/I110325482"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.95550321,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"309","last_page":"310"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12380","display_name":"Authorship Attribution and Profiling","score":0.9843999743461609,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12380","display_name":"Authorship Attribution and Profiling","score":0.9843999743461609,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13629","display_name":"Text Readability and Simplification","score":0.0024999999441206455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12262","display_name":"Hate Speech and Cyberbullying Detection","score":0.002300000051036477,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/support-vector-machine","display_name":"Support vector machine","score":0.6482999920845032},{"id":"https://openalex.org/keywords/generative-grammar","display_name":"Generative grammar","score":0.44769999384880066},{"id":"https://openalex.org/keywords/logistic-regression","display_name":"Logistic regression","score":0.44600000977516174},{"id":"https://openalex.org/keywords/sentence","display_name":"Sentence","score":0.40779998898506165},{"id":"https://openalex.org/keywords/test","display_name":"Test (biology)","score":0.3671000003814697},{"id":"https://openalex.org/keywords/computational-linguistics","display_name":"Computational linguistics","score":0.33180001378059387},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.32899999618530273}],"concepts":[{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.734000027179718},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6527000069618225},{"id":"https://openalex.org/C12267149","wikidata":"https://www.wikidata.org/wiki/Q282453","display_name":"Support vector machine","level":2,"score":0.6482999920845032},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5277000069618225},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.44769999384880066},{"id":"https://openalex.org/C151956035","wikidata":"https://www.wikidata.org/wiki/Q1132755","display_name":"Logistic regression","level":2,"score":0.44600000977516174},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.42089998722076416},{"id":"https://openalex.org/C2777530160","wikidata":"https://www.wikidata.org/wiki/Q41796","display_name":"Sentence","level":2,"score":0.40779998898506165},{"id":"https://openalex.org/C2777267654","wikidata":"https://www.wikidata.org/wiki/Q3519023","display_name":"Test (biology)","level":2,"score":0.3671000003814697},{"id":"https://openalex.org/C155092808","wikidata":"https://www.wikidata.org/wiki/Q182557","display_name":"Computational linguistics","level":2,"score":0.33180001378059387},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.32899999618530273},{"id":"https://openalex.org/C2779696439","wikidata":"https://www.wikidata.org/wiki/Q7512811","display_name":"Signature (topology)","level":2,"score":0.3264999985694885},{"id":"https://openalex.org/C3020202489","wikidata":"https://www.wikidata.org/wiki/Q2032038","display_name":"Authorship attribution","level":2,"score":0.32580000162124634},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.31349998712539673},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.29499998688697815},{"id":"https://openalex.org/C58471807","wikidata":"https://www.wikidata.org/wiki/Q327120","display_name":"Receiver operating characteristic","level":2,"score":0.2808000147342682},{"id":"https://openalex.org/C83546350","wikidata":"https://www.wikidata.org/wiki/Q1139051","display_name":"Regression","level":2,"score":0.2791000008583069},{"id":"https://openalex.org/C16910744","wikidata":"https://www.wikidata.org/wiki/Q7705759","display_name":"Test data","level":2,"score":0.27880001068115234},{"id":"https://openalex.org/C52001869","wikidata":"https://www.wikidata.org/wiki/Q812530","display_name":"Naive Bayes classifier","level":3,"score":0.26339998841285706}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3746467.3801525","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3746467.3801525","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2026 ACM Southeast Conference","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3746467.3801525","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3746467.3801525","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2026 ACM Southeast Conference","raw_type":"proceedings-article"},"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.8273904919624329}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":6,"referenced_works":["https://openalex.org/W2951080837","https://openalex.org/W3034287667","https://openalex.org/W4250089123","https://openalex.org/W4383815588","https://openalex.org/W4390175962","https://openalex.org/W4411534675"],"related_works":[],"abstract_inverted_index":{"The":[0,73],"rise":[1],"of":[2,47],"generative":[3],"AI":[4,98],"has":[5],"created":[6],"new":[7],"challenges":[8],"for":[9,88,94],"academic":[10,101],"integrity.":[11],"This":[12,80],"study":[13],"presents":[14],"a":[15,45,84],"machine":[16],"learning":[17],"framework":[18],"that":[19],"distinguishes":[20],"human":[21],"from":[22],"AI-authored":[23],"essays":[24],"by":[25],"combining":[26],"stylometric":[27],"features":[28],"(like":[29],"sentence":[30],"complexity":[31],"and":[32,41,59,97],"punctuation)":[33],"with":[34,67,91],"lexical":[35],"n-gram":[36],"patterns.":[37],"Using":[38],"logistic":[39],"regression":[40],"SVM":[42,74],"classifiers":[43],"on":[44,56,64],"corpus":[46],"2,750":[48],"essays,":[49],"both":[50],"models":[51],"achieved":[52],"over":[53],"99%":[54],"accuracy":[55,63],"test":[57],"data":[58],"maintained":[60],"above":[61],"90%":[62],"external":[65],"datasets,":[66],"ROC":[68],"AUC":[69],"scores":[70],"approaching":[71],"1.0.":[72],"model":[75],"showed":[76],"marginally":[77],"superior":[78],"performance.":[79],"hybrid":[81],"approach":[82],"offers":[83],"robust,":[85],"adaptable":[86],"solution":[87],"authorship":[89],"attribution":[90],"significant":[92],"implications":[93],"educational":[95],"assessment":[96],"governance":[99],"in":[100],"contexts.":[102]},"counts_by_year":[],"updated_date":"2026-05-15T06:12:33.780692","created_date":"2026-05-15T00:00:00"}
