{"id":"https://openalex.org/W4393701518","doi":"https://doi.org/10.5281/zenodo.5159398","title":"Detecting Cross-Language Plagiarism using Open Knowledge Graphs","display_name":"Detecting Cross-Language Plagiarism using Open Knowledge Graphs","publication_year":2020,"publication_date":"2020-08-06","ids":{"openalex":"https://openalex.org/W4393701518","doi":"https://doi.org/10.5281/zenodo.5159398"},"language":"en","primary_location":{"id":"pmh:oai:zenodo.org:5159398","is_oa":true,"landing_page_url":"https://zenodo.org/record/5159398","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"info:eu-repo/semantics/other"},"type":"dataset","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://zenodo.org/record/5159398","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5003840636","display_name":"Johannes Stegm\u00fcller","orcid":"https://orcid.org/0000-0001-5080-1808"},"institutions":[{"id":"https://openalex.org/I167360494","display_name":"University of Wuppertal","ror":"https://ror.org/00613ak93","country_code":"DE","type":"education","lineage":["https://openalex.org/I167360494"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Stegm\u00fcller, Johannes","raw_affiliation_strings":["University of Wuppertal"],"affiliations":[{"raw_affiliation_string":"University of Wuppertal","institution_ids":["https://openalex.org/I167360494"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5090295793","display_name":"Fabian Bauer-Marquart","orcid":"https://orcid.org/0000-0001-9312-1706"},"institutions":[{"id":"https://openalex.org/I189712700","display_name":"University of Konstanz","ror":"https://ror.org/0546hnb39","country_code":"DE","type":"education","lineage":["https://openalex.org/I189712700"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Bauer-Marquart, Fabian","raw_affiliation_strings":["University of Konstanz"],"affiliations":[{"raw_affiliation_string":"University of Konstanz","institution_ids":["https://openalex.org/I189712700"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5060549879","display_name":"Norman Meuschke","orcid":null},"institutions":[{"id":"https://openalex.org/I167360494","display_name":"University of Wuppertal","ror":"https://ror.org/00613ak93","country_code":"DE","type":"education","lineage":["https://openalex.org/I167360494"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Meuschke, Norman","raw_affiliation_strings":["University of Wuppertal"],"affiliations":[{"raw_affiliation_string":"University of Wuppertal","institution_ids":["https://openalex.org/I167360494"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5081763922","display_name":"Terry Ruas","orcid":"https://orcid.org/0000-0002-9440-780X"},"institutions":[{"id":"https://openalex.org/I167360494","display_name":"University of Wuppertal","ror":"https://ror.org/00613ak93","country_code":"DE","type":"education","lineage":["https://openalex.org/I167360494"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Ruas, Terry","raw_affiliation_strings":["University of Wuppertal"],"affiliations":[{"raw_affiliation_string":"University of Wuppertal","institution_ids":["https://openalex.org/I167360494"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038664667","display_name":"Moritz Schubotz","orcid":"https://orcid.org/0000-0001-7141-4997"},"institutions":[{"id":"https://openalex.org/I158675288","display_name":"FIZ Karlsruhe \u2013 Leibniz Institute for Information Infrastructure","ror":"https://ror.org/0387prb75","country_code":"DE","type":"nonprofit","lineage":["https://openalex.org/I158675288","https://openalex.org/I315704651"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Schubotz, Moritz","raw_affiliation_strings":["FIZ Karlsruhe"],"affiliations":[{"raw_affiliation_string":"FIZ Karlsruhe","institution_ids":["https://openalex.org/I158675288"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5058837356","display_name":"B\u00e9la Gipp","orcid":"https://orcid.org/0000-0001-6522-3019"},"institutions":[{"id":"https://openalex.org/I167360494","display_name":"University of Wuppertal","ror":"https://ror.org/00613ak93","country_code":"DE","type":"education","lineage":["https://openalex.org/I167360494"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Gipp, Bela","raw_affiliation_strings":["University of Wuppertal"],"affiliations":[{"raw_affiliation_string":"University of Wuppertal","institution_ids":["https://openalex.org/I167360494"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5003840636"],"corresponding_institution_ids":["https://openalex.org/I167360494"],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11492","display_name":"Academic integrity and plagiarism","score":0.9958999752998352,"subfield":{"id":"https://openalex.org/subfields/3311","display_name":"Safety Research"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11492","display_name":"Academic integrity and plagiarism","score":0.9958999752998352,"subfield":{"id":"https://openalex.org/subfields/3311","display_name":"Safety Research"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5375667810440063},{"id":"https://openalex.org/keywords/knowledge-graph","display_name":"Knowledge graph","score":0.4190177321434021},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3204573392868042},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.23558756709098816}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5375667810440063},{"id":"https://openalex.org/C2987255567","wikidata":"https://www.wikidata.org/wiki/Q33002955","display_name":"Knowledge graph","level":2,"score":0.4190177321434021},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3204573392868042},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.23558756709098816}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:zenodo.org:5159398","is_oa":true,"landing_page_url":"https://zenodo.org/record/5159398","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"info:eu-repo/semantics/other"},{"id":"doi:10.5281/zenodo.5159398","is_oa":true,"landing_page_url":"https://doi.org/10.5281/zenodo.5159398","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"dataset"}],"best_oa_location":{"id":"pmh:oai:zenodo.org:5159398","is_oa":true,"landing_page_url":"https://zenodo.org/record/5159398","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"info:eu-repo/semantics/other"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W2358668433","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W2382290278","https://openalex.org/W2478288626","https://openalex.org/W4391913857","https://openalex.org/W2350741829","https://openalex.org/W2530322880"],"abstract_inverted_index":{"Corresponding":[0],"authors:":[1],"Norman":[2],"Meuschke,":[3],"Terry":[4],"Ruas<br>":[5],"Venue:":[6],"2nd":[7],"Workshop":[8],"on":[9,25],"Extraction":[10],"and":[11,59,72,192,213,247,300,304,308,312,316,356,426,438],"Evaluation":[12],"of":[13,47,65,78,92,110,129,151,159,181,189,205,218,241,289,321,337,376],"Knowledge":[14],"Entities":[15],"from":[16,74,85,102,125,142,172,230,259,343,366,382],"Scientific":[17,41,420,432],"Documents":[18],"(EEKE2021)<br>":[19],"at":[20],"the":[21,66,75,117,126,143,148,152,173,178,182,206,219,223,260,267,328,344,349,367,377,380],"ACM/IEEE":[22],"Joint":[23,120],"Conference":[24],"Digital":[26],"Libraries":[27],"2021":[28],"(JCDL2021)":[29],"==========================================================================":[30,35,283],"<strong>Source":[31],"code:":[32],"https://github.com/ag-gipp/cl-osa":[33],"</strong>":[34],"<strong>Dataset":[36],"Details</strong>":[37],"<em>ASPEC</em>.":[38],"The":[39,107,155,185,234,351],"Asian":[40],"Paper":[42,421,433],"Excerpt":[43,422,434],"Corpus":[44],"comprises":[45],"excepts":[46],"scientific":[48],"papers":[49,80,97],"in":[50,113,147,163,177,222,266,293,348,354,379],"Japanese":[51,86,103,299,303,352,437],"that":[52,81,98,195,244],"have":[53],"been":[54],"manually":[55,84,101,279],"translated":[56,83,100],"to":[57,87,104],"English":[58,301,307,311,315,425],"Chinese.":[60,88],"We":[61,137,167,254],"use":[62],"both":[63],"subsets":[64],"ASPEC":[67,418,430],"corpus.":[68,153,183],"<em>ASPEC-JC</em><strong>":[69],"</strong>contains":[70],"abstracts":[71,91],"paragraphs":[73],"main":[76],"text":[77,229],"research":[79,96],"were":[82,99,196,225,245,274],"<em>ASPEC-JE</em>":[89],"contains":[90,157,187],"approx.":[93],"two":[94,383],"million":[95],"English.":[105],"<em>JRC-Acquis</em>.":[106],"corpus":[108,156,186,224,330],"consists":[109,320],"legislative":[111],"texts":[112],"22":[114],"languages,":[115],"which":[116,270],"European":[118,160,165],"Union's":[119],"Research":[121],"Centre":[122],"(JRC)":[123],"selected":[124,342],"cumulative":[127],"body":[128],"EU":[130],"laws":[131],"(the":[132],"so":[133],"called":[134],"Acquis":[135],"communautaire).":[136],"sampled":[138,169,256],"our":[139,294],"test":[140,170,257],"cases":[141,171,258],"10,000":[144],"document":[145,175,264],"pairs":[146,176,265],"English-French":[149,179],"subset":[150,180],"<em>Europarl</em>.":[154],"transcripts":[158],"Parliament":[161],"proceedings":[162],"21":[164],"languages.":[166],"exclusively":[168,255],"9,443":[174],"<em>PAN-PC-11</em>.":[184],"instances":[188,240,273],"simulated":[190,242,271],"monolingual":[191],"cross-language":[193],"plagiarism":[194,200,243,272],"used":[197,292],"for":[198,269],"evaluating":[199],"detection":[201],"methods":[202],"as":[203],"part":[204],"workshop":[207],"series":[208],"Plagiarism":[209],"Analysis,":[210],"Authorship":[211],"Identification,":[212],"Near-Duplicate":[214],"Detection":[215],"(PAN).":[216],"Most":[217],"26,939":[220],"documents":[221,235,291,339,378],"created":[226,246,278],"by":[227,251,280],"extracting":[228],"openly":[231],"available":[232],"books.":[233],"are":[236,365],"partially":[237],"interspersed":[238],"with":[239],"obfuscated":[248],"automatically":[249],"or":[250,277],"crowdsourced":[252,281],"workers.":[253,282],"2,921":[261],"Spanish-English":[262],"aligned":[263],"corpus,":[268],"either":[275],"machine-generated":[276],"<strong>File":[284],"Structure</strong>":[285],"<strong>[corpus_documents]":[286],"folder</strong>:":[287,372],"Corpora":[288],"translation-aligned":[290,323,338],"experiments":[295],"composed":[296],"of:":[297],"aspec:":[298],"aspecx:":[302],"Chinese":[305,464],"jrc:":[306],"French":[309,313],"europarl:":[310],"pan:":[314],"Spanish":[317,405],"Each":[318,335],"sub-corpus":[319],"4,000":[322],"files":[324,353],"(2,000":[325],"per":[326],"language);":[327],"entire":[329],"has":[331],"thus":[332],"20,000":[333],"files.<br>":[334],"set":[336],"was":[340],"randomly":[341],"original":[345],"datasets":[346,381],"(details":[347],"paper).<br>":[350],"aspec":[355,416],"aspecx":[357,428],"do":[358],"not":[359],"necessarily":[360],"overlap":[361],"even":[362],"though":[363],"they":[364],"same":[368],"dataset.":[369],"<br>":[370],"<strong>[vectors_documents]":[371],"Average":[373],"vector":[374],"representation":[375],"pre-trained":[384],"models:":[385],"Universal":[386],"Sentence":[387],"Encoder":[388],"-":[389,409,413,417,424,429,436,441,444,447,451,454,457,460,463],"Multilingual":[390],"(USE-ML)":[391],"ConceptNet":[392,400,410],"Numberbatch":[393,401],"<strong>Naming":[394],"convention</strong>:":[395],"&lt;model&gt;_&lt;dataset&gt;_&lt;language&gt;;":[396],"Example:":[397],"cn_jrc_es:":[398],"model:":[399],"corpus:":[402],"JRC-Acquis":[403],"language:":[404],"Labels:":[406],"&lt;model&gt;:<br>":[407],"cn":[408],"Numberbatch<br>":[411],"um":[412],"USE-ML<br>":[414],"&lt;dataset&gt;<br>":[415],"(Asian":[419,431],"Corpus)":[423,435],"Japanese<br>":[427,461],"Chinese<br>":[439],"jrc":[440],"JRC-Acquis<br>":[442],"europarl":[443],"Europarl<br>":[445],"pan":[446],"PAN-PC-11<br>":[448],"&lt;language&gt;<br>":[449],"en":[450],"English<br>":[452],"es":[453],"Spanish<br>":[455],"fr":[456],"French<br>":[458],"ja":[459],"zh":[462]},"counts_by_year":[],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
