{"id":"https://openalex.org/W2793821822","doi":"https://doi.org/10.1109/ialp.2017.8300585","title":"Compiling a text re-use detection corpus from scientific papers with semi-real cases of plagiarism","display_name":"Compiling a text re-use detection corpus from scientific papers with semi-real cases of plagiarism","publication_year":2017,"publication_date":"2017-12-01","ids":{"openalex":"https://openalex.org/W2793821822","doi":"https://doi.org/10.1109/ialp.2017.8300585","mag":"2793821822"},"language":"en","primary_location":{"id":"doi:10.1109/ialp.2017.8300585","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ialp.2017.8300585","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2017 International Conference on Asian Language Processing (IALP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5001930588","display_name":"Salar Mohtaj","orcid":"https://orcid.org/0000-0002-0032-3833"},"institutions":[{"id":"https://openalex.org/I2802694670","display_name":"Academic Center for Education, Culture and Research","ror":"https://ror.org/0126z4b94","country_code":"IR","type":"nonprofit","lineage":["https://openalex.org/I2802694670"]}],"countries":["IR"],"is_corresponding":false,"raw_author_name":"Salar Mohtaj","raw_affiliation_strings":["ICT Research Institute of ACECR Tehran, Tehran, Iran"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"ICT Research Institute of ACECR Tehran, Tehran, Iran","institution_ids":["https://openalex.org/I2802694670"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5073073246","display_name":"Habibollah Asghari","orcid":"https://orcid.org/0000-0001-9660-8595"},"institutions":[{"id":"https://openalex.org/I2802694670","display_name":"Academic Center for Education, Culture and Research","ror":"https://ror.org/0126z4b94","country_code":"IR","type":"nonprofit","lineage":["https://openalex.org/I2802694670"]}],"countries":["IR"],"is_corresponding":false,"raw_author_name":"Habibollah Asghari","raw_affiliation_strings":["ICT Research Institute of ACECR Tehran, Tehran, Iran"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"ICT Research Institute of ACECR Tehran, Tehran, Iran","institution_ids":["https://openalex.org/I2802694670"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5028589615","display_name":"Vahid Zarrabi","orcid":null},"institutions":[{"id":"https://openalex.org/I2802694670","display_name":"Academic Center for Education, Culture and Research","ror":"https://ror.org/0126z4b94","country_code":"IR","type":"nonprofit","lineage":["https://openalex.org/I2802694670"]}],"countries":["IR"],"is_corresponding":false,"raw_author_name":"Vahid Zarrabi","raw_affiliation_strings":["ICT Research Institute of ACECR Tehran, Tehran, Iran"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"ICT Research Institute of ACECR Tehran, Tehran, Iran","institution_ids":["https://openalex.org/I2802694670"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.4277,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.76286612,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":"3936","issue":null,"first_page":"227","last_page":"230"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11492","display_name":"Academic integrity and plagiarism","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/3311","display_name":"Safety Research"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11492","display_name":"Academic integrity and plagiarism","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/3311","display_name":"Safety Research"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9909999966621399,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9800000190734863,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/plagiarism-detection","display_name":"Plagiarism detection","score":0.9282628297805786},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8338984251022339},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.6331043839454651},{"id":"https://openalex.org/keywords/point","display_name":"Point (geometry)","score":0.5800800323486328},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5702943801879883},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.5229119658470154},{"id":"https://openalex.org/keywords/compiler","display_name":"Compiler","score":0.4542660117149353},{"id":"https://openalex.org/keywords/resource","display_name":"Resource (disambiguation)","score":0.45108044147491455},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4140045642852783},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.08232852816581726}],"concepts":[{"id":"https://openalex.org/C2780907237","wikidata":"https://www.wikidata.org/wiki/Q2986238","display_name":"Plagiarism detection","level":2,"score":0.9282628297805786},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8338984251022339},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.6331043839454651},{"id":"https://openalex.org/C28719098","wikidata":"https://www.wikidata.org/wiki/Q44946","display_name":"Point (geometry)","level":2,"score":0.5800800323486328},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5702943801879883},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.5229119658470154},{"id":"https://openalex.org/C169590947","wikidata":"https://www.wikidata.org/wiki/Q47506","display_name":"Compiler","level":2,"score":0.4542660117149353},{"id":"https://openalex.org/C206345919","wikidata":"https://www.wikidata.org/wiki/Q20380951","display_name":"Resource (disambiguation)","level":2,"score":0.45108044147491455},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4140045642852783},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.08232852816581726},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/ialp.2017.8300585","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ialp.2017.8300585","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2017 International Conference on Asian Language Processing (IALP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":5,"referenced_works":["https://openalex.org/W86855850","https://openalex.org/W1544505227","https://openalex.org/W2011611695","https://openalex.org/W2053017876","https://openalex.org/W6632555176"],"related_works":["https://openalex.org/W3168266056","https://openalex.org/W4287126803","https://openalex.org/W2735162248","https://openalex.org/W2112890327","https://openalex.org/W4250737080","https://openalex.org/W311440260","https://openalex.org/W2945111072","https://openalex.org/W1496191935","https://openalex.org/W1971922616","https://openalex.org/W3145472249"],"abstract_inverted_index":{"Automatic":[0],"plagiarism":[1,25,29,42,65,89,132],"detection":[2,43],"deals":[3],"with":[4,182],"retrieval":[5],"of":[6,9,21,38,55,61,64,98,103,117,131,170,179,190],"reused":[7],"fragment":[8],"texts":[10],"in":[11,46,50],"a":[12,59,88,122,129,135],"document":[13],"and":[14,77,114,159],"finding":[15],"source":[16],"documents.":[17],"Due":[18,72],"to":[19,33,73,86,94,127,148,184],"development":[20],"various":[22],"methods":[23,80,155],"for":[24,66,187],"detection,":[26],"large":[27],"scale":[28],"corpora":[30,44,113],"are":[31,81,107],"needed":[32],"evaluate":[34],"these":[35,74],"methods.":[36],"Despite":[37],"their":[39],"importance,":[40],"few":[41],"developed":[45],"recent":[47],"years,":[48],"especially":[49],"low":[51],"resource":[52],"languages.":[53],"Because":[54],"legal":[56],"issues,":[57],"releasing":[58],"collection":[60,130],"real":[62,96,115,168],"cases":[63,97,116,133,169],"evaluation":[67,174],"purposes":[68],"is":[69,125,139],"not":[70],"ethical.":[71],"limitations,":[75],"simulation":[76],"artificial":[78,160],"based":[79,140,156],"the":[82,162],"two":[83],"main":[84],"approaches":[85,92],"compile":[87],"corpus.":[90,136],"These":[91],"try":[93],"simulate":[95,167],"plagiarism,":[99],"from":[100,145],"different":[101,188],"point":[102],"views.":[104],"However,":[105],"there":[106],"still":[108],"fundamental":[109],"differences":[110],"between":[111],"simulated":[112,158],"plagiarism.":[118],"In":[119],"this":[120],"paper":[121],"semi-real":[123],"approach":[124,138],"proposed":[126,163,180],"create":[128],"as":[134,151],"This":[137],"on":[141,157],"eliminating":[142],"correct":[143],"references":[144],"scientific":[146],"papers":[147],"make":[149],"them":[150],"plagiarized":[152],"passages.":[153],"Unlike":[154],"approaches,":[161],"corpus":[164,181],"can":[165],"correctly":[166],"text":[171],"re-use.":[172],"The":[173],"result":[175],"shows":[176],"high":[177],"accuracy":[178],"respect":[183],"n-gram":[185],"similarity":[186],"ranges":[189],"N.":[191]},"counts_by_year":[{"year":2023,"cited_by_count":1},{"year":2020,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
