{"id":"https://openalex.org/W4393472115","doi":"https://doi.org/10.5281/zenodo.5159387","title":"Detecting Cross-Language Plagiarism using Open Knowledge Graphs","display_name":"Detecting Cross-Language Plagiarism using Open Knowledge Graphs","publication_year":2020,"publication_date":"2020-08-01","ids":{"openalex":"https://openalex.org/W4393472115","doi":"https://doi.org/10.5281/zenodo.5159387"},"language":"en","primary_location":{"id":"pmh:oai:zenodo.org:5159387","is_oa":true,"landing_page_url":"https://zenodo.org/record/5159387","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"info:eu-repo/semantics/other"},"type":"dataset","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://zenodo.org/record/5159387","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5003840636","display_name":"Johannes Stegm\u00fcller","orcid":"https://orcid.org/0000-0001-5080-1808"},"institutions":[{"id":"https://openalex.org/I167360494","display_name":"University of Wuppertal","ror":"https://ror.org/00613ak93","country_code":"DE","type":"education","lineage":["https://openalex.org/I167360494"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Stegm\u00fcller, Johannes","raw_affiliation_strings":["University of Wuppertal"],"affiliations":[{"raw_affiliation_string":"University of Wuppertal","institution_ids":["https://openalex.org/I167360494"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5090295793","display_name":"Fabian Bauer-Marquart","orcid":"https://orcid.org/0000-0001-9312-1706"},"institutions":[{"id":"https://openalex.org/I189712700","display_name":"University of Konstanz","ror":"https://ror.org/0546hnb39","country_code":"DE","type":"education","lineage":["https://openalex.org/I189712700"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Bauer-Marquart, Fabian","raw_affiliation_strings":["University of Konstanz"],"affiliations":[{"raw_affiliation_string":"University of Konstanz","institution_ids":["https://openalex.org/I189712700"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5060549879","display_name":"Norman Meuschke","orcid":null},"institutions":[{"id":"https://openalex.org/I167360494","display_name":"University of Wuppertal","ror":"https://ror.org/00613ak93","country_code":"DE","type":"education","lineage":["https://openalex.org/I167360494"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Meuschke, Norman","raw_affiliation_strings":["University of Wuppertal"],"affiliations":[{"raw_affiliation_string":"University of Wuppertal","institution_ids":["https://openalex.org/I167360494"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5081763922","display_name":"Terry Ruas","orcid":"https://orcid.org/0000-0002-9440-780X"},"institutions":[{"id":"https://openalex.org/I167360494","display_name":"University of Wuppertal","ror":"https://ror.org/00613ak93","country_code":"DE","type":"education","lineage":["https://openalex.org/I167360494"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Ruas, Terry","raw_affiliation_strings":["University of Wuppertal"],"affiliations":[{"raw_affiliation_string":"University of Wuppertal","institution_ids":["https://openalex.org/I167360494"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038664667","display_name":"Moritz Schubotz","orcid":"https://orcid.org/0000-0001-7141-4997"},"institutions":[{"id":"https://openalex.org/I158675288","display_name":"FIZ Karlsruhe \u2013 Leibniz Institute for Information Infrastructure","ror":"https://ror.org/0387prb75","country_code":"DE","type":"nonprofit","lineage":["https://openalex.org/I158675288","https://openalex.org/I315704651"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Schubotz, Moritz","raw_affiliation_strings":["FIZ Karlsruhe"],"affiliations":[{"raw_affiliation_string":"FIZ Karlsruhe","institution_ids":["https://openalex.org/I158675288"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5058837356","display_name":"B\u00e9la Gipp","orcid":"https://orcid.org/0000-0001-6522-3019"},"institutions":[{"id":"https://openalex.org/I167360494","display_name":"University of Wuppertal","ror":"https://ror.org/00613ak93","country_code":"DE","type":"education","lineage":["https://openalex.org/I167360494"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Gipp, Bela","raw_affiliation_strings":["University of Wuppertal"],"affiliations":[{"raw_affiliation_string":"University of Wuppertal","institution_ids":["https://openalex.org/I167360494"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5003840636"],"corresponding_institution_ids":["https://openalex.org/I167360494"],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11492","display_name":"Academic integrity and plagiarism","score":0.9958999752998352,"subfield":{"id":"https://openalex.org/subfields/3311","display_name":"Safety Research"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11492","display_name":"Academic integrity and plagiarism","score":0.9958999752998352,"subfield":{"id":"https://openalex.org/subfields/3311","display_name":"Safety Research"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5952742695808411},{"id":"https://openalex.org/keywords/knowledge-graph","display_name":"Knowledge graph","score":0.44365620613098145},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.2263249158859253}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5952742695808411},{"id":"https://openalex.org/C2987255567","wikidata":"https://www.wikidata.org/wiki/Q33002955","display_name":"Knowledge graph","level":2,"score":0.44365620613098145},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2263249158859253}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:zenodo.org:5159387","is_oa":true,"landing_page_url":"https://zenodo.org/record/5159387","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"info:eu-repo/semantics/other"},{"id":"doi:10.5281/zenodo.5159387","is_oa":true,"landing_page_url":"https://doi.org/10.5281/zenodo.5159387","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"dataset"}],"best_oa_location":{"id":"pmh:oai:zenodo.org:5159387","is_oa":true,"landing_page_url":"https://zenodo.org/record/5159387","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"info:eu-repo/semantics/other"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W2358668433","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W2382290278","https://openalex.org/W2478288626","https://openalex.org/W4391913857","https://openalex.org/W2350741829","https://openalex.org/W2530322880"],"abstract_inverted_index":{"Corresponding":[0],"authors:":[1],"Norman":[2],"Meuschke,":[3],"Terry":[4],"Ruas<br>":[5],"Venue:":[6],"TBA":[7],"(under":[8],"review)":[9],"==========================================================================":[10,15,263],"<strong>Source":[11],"code:":[12],"https://github.com/ag-gipp/cl-osa":[13],"</strong>":[14],"<strong>Dataset":[16],"Details</strong>":[17],"<em>ASPEC</em>.":[18],"The":[19,87,135,165,214,331],"Asian":[20],"Scientific":[21],"Paper":[22],"Excerpt":[23],"Corpus":[24],"comprises":[25],"excepts":[26],"of":[27,45,58,72,90,109,131,139,161,169,185,198,221,269,301,317],"scientific":[28],"papers":[29,60,77],"in":[30,93,127,143,157,202,246,273,328,334],"Japanese":[31,66,83,279,283,332],"that":[32,61,78,175,224],"have":[33],"been":[34],"manually":[35,64,81,259],"translated":[36,63,80],"to":[37,67,84],"English":[38,281,287,291,295],"and":[39,52,172,193,227,280,284,288,292,296,336],"Chinese.":[40,68],"We":[41,117,147,234],"use":[42],"both":[43],"subsets":[44],"the":[46,55,97,106,123,128,132,153,158,162,186,199,203,240,247,308,324,329,347],"ASPEC":[47],"corpus.":[48,133,163],"<em>ASPEC-JC</em><strong>":[49],"</strong>contains":[50],"abstracts":[51,71],"paragraphs":[53],"from":[54,65,82,105,122,152,210,239,323,346],"main":[56],"text":[57,209],"research":[59,76],"were":[62,79,176,205,225,254],"<em>ASPEC-JE</em>":[69],"contains":[70,137,167],"approx.":[73],"two":[74],"million":[75],"English.":[85],"<em>JRC-Acquis</em>.":[86],"corpus":[88,136,166,204,310],"consists":[89,300],"legislative":[91],"texts":[92],"22":[94],"languages,":[95],"which":[96,250],"European":[98,140,145],"Union's":[99],"Joint":[100],"Research":[101],"Centre":[102],"(JRC)":[103],"selected":[104,322],"cumulative":[107],"body":[108],"EU":[110],"laws":[111],"(the":[112],"so":[113],"called":[114],"Acquis":[115],"communautaire).":[116],"sampled":[118,149,236],"our":[119,274],"test":[120,150,237],"cases":[121,151,238],"10,000":[124],"document":[125,155,244],"pairs":[126,156,245],"English-French":[129,159],"subset":[130,160],"<em>Europarl</em>.":[134],"transcripts":[138],"Parliament":[141],"proceedings":[142],"21":[144],"languages.":[146],"exclusively":[148,235],"9,443":[154],"<em>PAN-PC-11</em>.":[164],"instances":[168,220,253],"simulated":[170,222,251],"monolingual":[171],"cross-language":[173],"plagiarism":[174,180,223,252],"used":[177,272],"for":[178,249],"evaluating":[179],"detection":[181],"methods":[182],"as":[183],"part":[184],"workshop":[187],"series":[188],"Plagiarism":[189],"Analysis,":[190],"Authorship":[191],"Identification,":[192],"Near-Duplicate":[194],"Detection":[195],"(PAN).":[196],"Most":[197],"26,939":[200],"documents":[201,215,271,319],"created":[206,226,258],"by":[207,231,260],"extracting":[208],"openly":[211],"available":[212],"books.":[213],"are":[216,345],"partially":[217],"interspersed":[218],"with":[219],"obfuscated":[228],"automatically":[229],"or":[230,257],"crowdsourced":[232,261],"workers.":[233,262],"2,921":[241],"Spanish-English":[242],"aligned":[243],"corpus,":[248],"either":[255],"machine-generated":[256],"<strong>File":[264],"Structure</strong>":[265],"<strong>[corpus_documents]":[266],"folder</strong>:":[267],"Corpora":[268],"translation-aligned":[270,303,318],"experiments":[275],"composed":[276],"of:":[277],"aspec:":[278],"aspecx:":[282],"Chinese":[285],"jrc:":[286],"French":[289,293],"europarl:":[290],"pan:":[294],"Spanish":[297],"Each":[298,315],"sub-corpus":[299],"4,000":[302],"files":[304,333],"(2,000":[305],"per":[306],"language);":[307],"entire":[309],"has":[311],"thus":[312],"20,000":[313],"files.<br>":[314],"set":[316],"was":[320],"randomly":[321],"original":[325],"datasets":[326],"(details":[327],"paper).<br>":[330],"aspec":[335],"aspecx":[337],"do":[338],"not":[339],"necessarily":[340],"overlap":[341],"even":[342],"though":[343],"they":[344],"same":[348],"dataset.":[349]},"counts_by_year":[],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
