{"id":"https://openalex.org/W2243493889","doi":"https://doi.org/10.1145/2824864.2824879","title":"On the Importance of Lexicon, Structure and Style for Identifying Source Code Plagiarism","display_name":"On the Importance of Lexicon, Structure and Style for Identifying Source Code Plagiarism","publication_year":2015,"publication_date":"2015-01-01","ids":{"openalex":"https://openalex.org/W2243493889","doi":"https://doi.org/10.1145/2824864.2824879","mag":"2243493889"},"language":"en","primary_location":{"id":"doi:10.1145/2824864.2824879","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2824864.2824879","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Forum for Information Retrieval Evaluation on - FIRE '14","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5021813516","display_name":"Aar\u00f3n Ram\u00edrez-de-la-Cruz","orcid":null},"institutions":[{"id":"https://openalex.org/I200362191","display_name":"Universidad Aut\u00f3noma Metropolitana","ror":"https://ror.org/02kta5139","country_code":"MX","type":"education","lineage":["https://openalex.org/I200362191"]}],"countries":["MX"],"is_corresponding":false,"raw_author_name":"Aar\u00f3n Ram\u00edrez-de-la-Cruz","raw_affiliation_strings":["Departamento de Tecnolog\u00edas de la Informaci\u00f3n, Universidad Aut\u00f3noma Metropolitana, Unidad Cuajimalpa, M\u00e9xico D. F"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Departamento de Tecnolog\u00edas de la Informaci\u00f3n, Universidad Aut\u00f3noma Metropolitana, Unidad Cuajimalpa, M\u00e9xico D. F","institution_ids":["https://openalex.org/I200362191"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5058435824","display_name":"Gabriela Ram\u00edrez-de-la-Rosa","orcid":"https://orcid.org/0000-0003-4730-5613"},"institutions":[{"id":"https://openalex.org/I200362191","display_name":"Universidad Aut\u00f3noma Metropolitana","ror":"https://ror.org/02kta5139","country_code":"MX","type":"education","lineage":["https://openalex.org/I200362191"]}],"countries":["MX"],"is_corresponding":false,"raw_author_name":"Gabriela Ram\u00edrez-de-la-Rosa","raw_affiliation_strings":["Departamento de Tecnolog\u00edas de la Informaci\u00f3n, Universidad Aut\u00f3noma Metropolitana, Unidad Cuajimalpa, M\u00e9xico D. F"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Departamento de Tecnolog\u00edas de la Informaci\u00f3n, Universidad Aut\u00f3noma Metropolitana, Unidad Cuajimalpa, M\u00e9xico D. F","institution_ids":["https://openalex.org/I200362191"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5089028902","display_name":"Christian S\u00e1nchez-S\u00e1nchez","orcid":"https://orcid.org/0000-0002-5762-4741"},"institutions":[{"id":"https://openalex.org/I200362191","display_name":"Universidad Aut\u00f3noma Metropolitana","ror":"https://ror.org/02kta5139","country_code":"MX","type":"education","lineage":["https://openalex.org/I200362191"]}],"countries":["MX"],"is_corresponding":false,"raw_author_name":"Christian S\u00e1nchez-S\u00e1nchez","raw_affiliation_strings":["Departamento de Tecnolog\u00edas de la Informaci\u00f3n, Universidad Aut\u00f3noma Metropolitana, Unidad Cuajimalpa, M\u00e9xico D. F"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Departamento de Tecnolog\u00edas de la Informaci\u00f3n, Universidad Aut\u00f3noma Metropolitana, Unidad Cuajimalpa, M\u00e9xico D. F","institution_ids":["https://openalex.org/I200362191"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5056716721","display_name":"H\u00e9ctor Jim\u00e9nez-Salazar","orcid":null},"institutions":[{"id":"https://openalex.org/I200362191","display_name":"Universidad Aut\u00f3noma Metropolitana","ror":"https://ror.org/02kta5139","country_code":"MX","type":"education","lineage":["https://openalex.org/I200362191"]}],"countries":["MX"],"is_corresponding":false,"raw_author_name":"H\u00e9ctor Jim\u00e9nez-Salazar","raw_affiliation_strings":["Departamento de Tecnolog\u00edas de la Informaci\u00f3n, Universidad Aut\u00f3noma Metropolitana, Unidad Cuajimalpa, M\u00e9xico D. F"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Departamento de Tecnolog\u00edas de la Informaci\u00f3n, Universidad Aut\u00f3noma Metropolitana, Unidad Cuajimalpa, M\u00e9xico D. F","institution_ids":["https://openalex.org/I200362191"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.15402622,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"31","last_page":"38"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9972000122070312,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9972000122070312,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9958000183105469,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11492","display_name":"Academic integrity and plagiarism","score":0.9933000206947327,"subfield":{"id":"https://openalex.org/subfields/3311","display_name":"Safety Research"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8086990118026733},{"id":"https://openalex.org/keywords/source-code","display_name":"Source code","score":0.7750002145767212},{"id":"https://openalex.org/keywords/identifier","display_name":"Identifier","score":0.6049266457557678},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.544729471206665},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5443131923675537},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.5320143699645996},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.5173332095146179},{"id":"https://openalex.org/keywords/plagiarism-detection","display_name":"Plagiarism detection","score":0.5022344589233398},{"id":"https://openalex.org/keywords/character-encoding","display_name":"Character encoding","score":0.48652878403663635},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.46514737606048584},{"id":"https://openalex.org/keywords/lexicon","display_name":"Lexicon","score":0.43528324365615845},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.4205673038959503},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.3980035185813904},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.39410078525543213},{"id":"https://openalex.org/keywords/character","display_name":"Character (mathematics)","score":0.24966531991958618}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8086990118026733},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.7750002145767212},{"id":"https://openalex.org/C154504017","wikidata":"https://www.wikidata.org/wiki/Q853614","display_name":"Identifier","level":2,"score":0.6049266457557678},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.544729471206665},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5443131923675537},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.5320143699645996},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.5173332095146179},{"id":"https://openalex.org/C2780907237","wikidata":"https://www.wikidata.org/wiki/Q2986238","display_name":"Plagiarism detection","level":2,"score":0.5022344589233398},{"id":"https://openalex.org/C32717103","wikidata":"https://www.wikidata.org/wiki/Q184759","display_name":"Character encoding","level":3,"score":0.48652878403663635},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.46514737606048584},{"id":"https://openalex.org/C2778121359","wikidata":"https://www.wikidata.org/wiki/Q8096","display_name":"Lexicon","level":2,"score":0.43528324365615845},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.4205673038959503},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.3980035185813904},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.39410078525543213},{"id":"https://openalex.org/C2780861071","wikidata":"https://www.wikidata.org/wiki/Q1062934","display_name":"Character (mathematics)","level":2,"score":0.24966531991958618},{"id":"https://openalex.org/C78458016","wikidata":"https://www.wikidata.org/wiki/Q840400","display_name":"Evolutionary biology","level":1,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/2824864.2824879","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2824864.2824879","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Forum for Information Retrieval Evaluation on - FIRE '14","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.4699999988079071}],"awards":[],"funders":[{"id":"https://openalex.org/F4320321739","display_name":"Consejo Nacional de Ciencia y Tecnolog\u00eda","ror":"https://ror.org/059ex5q34"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":18,"referenced_works":["https://openalex.org/W1507027376","https://openalex.org/W1576226931","https://openalex.org/W1579364623","https://openalex.org/W1660390307","https://openalex.org/W1819770257","https://openalex.org/W1954856259","https://openalex.org/W1965206591","https://openalex.org/W1971922616","https://openalex.org/W2015338694","https://openalex.org/W2015614877","https://openalex.org/W2044348688","https://openalex.org/W2113403290","https://openalex.org/W2146659255","https://openalex.org/W2248990385","https://openalex.org/W2616363589","https://openalex.org/W3215498156","https://openalex.org/W6681648988","https://openalex.org/W6804347171"],"related_works":["https://openalex.org/W3127553152","https://openalex.org/W3002459461","https://openalex.org/W3173165160","https://openalex.org/W3127649977","https://openalex.org/W4287331153","https://openalex.org/W2155491649","https://openalex.org/W4224015079","https://openalex.org/W2100097978","https://openalex.org/W3091649732","https://openalex.org/W1985086575"],"abstract_inverted_index":{"Source":[0],"code":[1,37,133,156,181,190],"plagiarism":[2,178],"can":[3],"be":[4],"identified":[5],"by":[6],"analyzing":[7],"similarities":[8],"of":[9,14,17,27,35,95,113,117,131,144,151,171,174],"several":[10,106],"and":[11,44,91,160],"diverse":[12],"aspects":[13,34],"a":[15,53,84,138,149],"pair":[16],"source":[18,36,85,132,145,155,180,189],"code.":[19],"In":[20],"this":[21],"paper":[22],"we":[23,51,71,123],"present":[24],"three":[25,33],"types":[26,90],"similarity":[28,74,126],"features":[29,127,175],"that":[30,76,183,191],"account":[31,79],"for":[32,61,105,179],"documents,":[38],"particularly:":[39],"i)":[40],"lexical,":[41],"ii)":[42],"structural,":[43],"iii)":[45],"stylistics.":[46],"From":[47],"the":[48,62,68,80,88,92,96,111,169],"lexical":[49],"view,":[50,70],"used":[52],"character":[54],"3-gram":[55],"model":[56],"without":[57],"considering":[58],"reserved":[59],"words":[60],"programming":[63],"language":[64],"in":[65,134],"revision.":[66],"For":[67],"structural":[69],"proposed":[72,124],"two":[73],"metrics":[75],"take":[77],"into":[78],"function's":[81,97],"signatures":[82],"within":[83],"code,":[86,118],"namely":[87],"data":[89],"identifier's":[93],"names":[94],"signature.":[98],"The":[99,166],"third":[100],"view":[101],"consists":[102],"on":[103],"accounting":[104],"stylistics'":[107],"features,":[108],"such":[109,187],"as":[110],"number":[112],"white":[114],"spaces,":[115],"lines":[116],"upper":[119],"letters,":[120],"etc.":[121],"Accordingly,":[122],"8":[125],"to":[128,162,176],"represent":[129],"pairs":[130,143],"order":[135],"to,":[136],"under":[137],"supervised":[139],"approach,":[140],"identify":[141,177],"plagiarized":[142],"codes.":[146],"We":[147],"use":[148],"set":[150,173],"more":[152],"than":[153],"32000":[154],"documents":[157,182],"from":[158],"Java":[159],"C":[161],"perform":[163],"our":[164,172],"experiments.":[165],"results":[167],"show":[168],"pertinence":[170],"satisfy":[184],"particular":[185],"conditions,":[186],"as,":[188],"solve":[192],"difficult":[193],"problems.":[194]},"counts_by_year":[{"year":2022,"cited_by_count":3},{"year":2021,"cited_by_count":1},{"year":2019,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
