{"id":"https://openalex.org/W3213706433","doi":"https://doi.org/10.5281/zenodo.3616683","title":"Detecting Cross-Language Plagiarism using Open Knowledge Graphs","display_name":"Detecting Cross-Language Plagiarism using Open Knowledge Graphs","publication_year":2021,"publication_date":"2021-01-21","ids":{"openalex":"https://openalex.org/W3213706433","doi":"https://doi.org/10.5281/zenodo.3616683","mag":"3213706433"},"language":"en","primary_location":{"id":"pmh:oai:figshare.com:article/11814825","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4377196282","display_name":"Figshare","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210132348","host_organization_name":"Figshare (United Kingdom)","host_organization_lineage":["https://openalex.org/I4210132348"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Dataset"},"type":"dataset","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5003840636","display_name":"Johannes Stegm\u00fcller","orcid":"https://orcid.org/0000-0001-5080-1808"},"institutions":[{"id":"https://openalex.org/I167360494","display_name":"University of Wuppertal","ror":"https://ror.org/00613ak93","country_code":"DE","type":"education","lineage":["https://openalex.org/I167360494"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Stegm\u00fcller, Johannes","raw_affiliation_strings":["University of Wuppertal"],"affiliations":[{"raw_affiliation_string":"University of Wuppertal","institution_ids":["https://openalex.org/I167360494"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5090295793","display_name":"Fabian Bauer-Marquart","orcid":"https://orcid.org/0000-0001-9312-1706"},"institutions":[{"id":"https://openalex.org/I189712700","display_name":"University of Konstanz","ror":"https://ror.org/0546hnb39","country_code":"DE","type":"education","lineage":["https://openalex.org/I189712700"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Bauer-Marquart, Fabian","raw_affiliation_strings":["University of Konstanz"],"affiliations":[{"raw_affiliation_string":"University of Konstanz","institution_ids":["https://openalex.org/I189712700"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5060549879","display_name":"Norman Meuschke","orcid":null},"institutions":[{"id":"https://openalex.org/I167360494","display_name":"University of Wuppertal","ror":"https://ror.org/00613ak93","country_code":"DE","type":"education","lineage":["https://openalex.org/I167360494"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Meuschke, Norman","raw_affiliation_strings":["University of Wuppertal"],"affiliations":[{"raw_affiliation_string":"University of Wuppertal","institution_ids":["https://openalex.org/I167360494"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5081763922","display_name":"Terry Ruas","orcid":"https://orcid.org/0000-0002-9440-780X"},"institutions":[{"id":"https://openalex.org/I167360494","display_name":"University of Wuppertal","ror":"https://ror.org/00613ak93","country_code":"DE","type":"education","lineage":["https://openalex.org/I167360494"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Ruas, Terry","raw_affiliation_strings":["University of Wuppertal"],"affiliations":[{"raw_affiliation_string":"University of Wuppertal","institution_ids":["https://openalex.org/I167360494"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038664667","display_name":"Moritz Schubotz","orcid":"https://orcid.org/0000-0001-7141-4997"},"institutions":[{"id":"https://openalex.org/I158675288","display_name":"FIZ Karlsruhe \u2013 Leibniz Institute for Information Infrastructure","ror":"https://ror.org/0387prb75","country_code":"DE","type":"nonprofit","lineage":["https://openalex.org/I158675288","https://openalex.org/I315704651"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Schubotz, Moritz","raw_affiliation_strings":["FIZ Karlsruhe"],"affiliations":[{"raw_affiliation_string":"FIZ Karlsruhe","institution_ids":["https://openalex.org/I158675288"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5058837356","display_name":"B\u00e9la Gipp","orcid":"https://orcid.org/0000-0001-6522-3019"},"institutions":[{"id":"https://openalex.org/I167360494","display_name":"University of Wuppertal","ror":"https://ror.org/00613ak93","country_code":"DE","type":"education","lineage":["https://openalex.org/I167360494"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Gipp, Bela","raw_affiliation_strings":["University of Wuppertal"],"affiliations":[{"raw_affiliation_string":"University of Wuppertal","institution_ids":["https://openalex.org/I167360494"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5003840636"],"corresponding_institution_ids":["https://openalex.org/I167360494"],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11492","display_name":"Academic integrity and plagiarism","score":0.9962999820709229,"subfield":{"id":"https://openalex.org/subfields/3311","display_name":"Safety Research"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11492","display_name":"Academic integrity and plagiarism","score":0.9962999820709229,"subfield":{"id":"https://openalex.org/subfields/3311","display_name":"Safety Research"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9951000213623047,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13629","display_name":"Text Readability and Simplification","score":0.9879999756813049,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5947659611701965},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4151371121406555},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.363261878490448}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5947659611701965},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4151371121406555},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.363261878490448}],"mesh":[],"locations_count":3,"locations":[{"id":"pmh:oai:figshare.com:article/11814825","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4377196282","display_name":"Figshare","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210132348","host_organization_name":"Figshare (United Kingdom)","host_organization_lineage":["https://openalex.org/I4210132348"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Dataset"},{"id":"pmh:oai:zenodo.org:3616683","is_oa":true,"landing_page_url":"https://zenodo.org/record/3616683","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"info:eu-repo/semantics/other"},{"id":"doi:10.5281/zenodo.3616683","is_oa":true,"landing_page_url":"https://doi.org/10.5281/zenodo.3616683","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"dataset"}],"best_oa_location":{"id":"pmh:oai:figshare.com:article/11814825","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4377196282","display_name":"Figshare","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210132348","host_organization_name":"Figshare (United Kingdom)","host_organization_lineage":["https://openalex.org/I4210132348"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Dataset"},"sustainable_development_goals":[{"display_name":"Quality Education","score":0.8600000143051147,"id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":57,"referenced_works":["https://openalex.org/W13806629","https://openalex.org/W22168010","https://openalex.org/W58646613","https://openalex.org/W64401354","https://openalex.org/W1537393456","https://openalex.org/W1542647698","https://openalex.org/W1853530259","https://openalex.org/W1974336599","https://openalex.org/W2028776121","https://openalex.org/W2029097226","https://openalex.org/W2038721957","https://openalex.org/W2096194908","https://openalex.org/W2097333193","https://openalex.org/W2120779048","https://openalex.org/W2123442489","https://openalex.org/W2131744502","https://openalex.org/W2131752763","https://openalex.org/W2132167546","https://openalex.org/W2132339004","https://openalex.org/W2133444727","https://openalex.org/W2150155583","https://openalex.org/W2153579005","https://openalex.org/W2171313960","https://openalex.org/W2184135559","https://openalex.org/W2250473257","https://openalex.org/W2250491765","https://openalex.org/W2250539671","https://openalex.org/W2251765408","https://openalex.org/W2277931188","https://openalex.org/W2340908569","https://openalex.org/W2401421013","https://openalex.org/W2466291125","https://openalex.org/W2500036977","https://openalex.org/W2560619722","https://openalex.org/W2576482813","https://openalex.org/W2591547273","https://openalex.org/W2593644299","https://openalex.org/W2626778328","https://openalex.org/W2744377995","https://openalex.org/W2767810198","https://openalex.org/W2783710698","https://openalex.org/W2798619734","https://openalex.org/W2807704784","https://openalex.org/W2945673882","https://openalex.org/W2950133940","https://openalex.org/W2950896470","https://openalex.org/W2951166594","https://openalex.org/W2963403868","https://openalex.org/W2963832429","https://openalex.org/W2964207259","https://openalex.org/W2980420460","https://openalex.org/W2986643679","https://openalex.org/W2988037059","https://openalex.org/W3006227201","https://openalex.org/W3038033387","https://openalex.org/W3099285086","https://openalex.org/W3103959529"],"related_works":["https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W2358668433","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W2382290278","https://openalex.org/W2478288626","https://openalex.org/W4391913857","https://openalex.org/W2350741829","https://openalex.org/W2530322880"],"abstract_inverted_index":{"Corresponding":[0],"authors:":[1],"Norman":[2],"Meuschke,":[3],"Terry":[4],"Ruas<br>":[5],"Venue:":[6],"TBA":[7],"(under":[8],"review)":[9],"==========================================================================":[10,15,263],"<strong>Source":[11],"code:":[12],"https://github.com/ag-gipp/cl-osa":[13],"</strong>":[14],"<strong>Dataset":[16],"Details</strong>":[17],"<em>ASPEC</em>.":[18],"The":[19,87,135,165,214,331,400],"Asian":[20],"Scientific":[21,441,453],"Paper":[22,442,454],"Excerpt":[23,443,455],"Corpus":[24],"comprises":[25],"excepts":[26],"of":[27,45,58,72,90,109,131,139,161,169,185,198,221,269,301,317,376,405],"scientific":[28],"papers":[29,60,77],"in":[30,93,127,143,157,202,246,273,328,334,379],"Japanese":[31,66,83,279,283,332,458],"that":[32,61,78,175,224],"have":[33],"been":[34],"manually":[35,64,81,259],"translated":[36,63,80],"to":[37,67,84],"English":[38,281,287,291,295,359,363,367,446],"and":[39,52,172,193,227,280,284,288,292,296,336,360,364,368,447,459],"Chinese.":[40,68],"We":[41,117,147,234],"use":[42],"both":[43],"subsets":[44],"the":[46,55,97,106,123,128,132,153,158,162,186,199,203,240,247,308,324,329,347,377,380,408],"ASPEC":[47,439,451],"corpus.":[48,133,163,414],"<em>ASPEC-JC</em><strong>":[49],"</strong>contains":[50],"abstracts":[51,71],"paragraphs":[53,354],"from":[54,65,82,105,122,152,210,239,323,346,382],"main":[56],"text":[57,209],"research":[59,76],"were":[62,79,176,205,225,254],"<em>ASPEC-JE</em>":[69],"contains":[70,137,167],"approx.":[73],"two":[74,383],"million":[75],"English.":[85],"<em>JRC-Acquis</em>.":[86],"corpus":[88,136,166,204,310],"consists":[89,300],"legislative":[91],"texts":[92],"22":[94],"languages,":[95],"which":[96,250],"European":[98,140,145],"Union's":[99],"Joint":[100],"Research":[101],"Centre":[102],"(JRC)":[103],"selected":[104,322,356],"cumulative":[107],"body":[108],"EU":[110],"laws":[111],"(the":[112],"so":[113],"called":[114],"Acquis":[115],"communautaire).":[116],"sampled":[118,149,236],"our":[119,274],"test":[120,150,237],"cases":[121,151,238],"10,000":[124],"document":[125,155,244],"pairs":[126,156,245],"English-French":[129,159],"subset":[130,160],"<em>Europarl</em>.":[134],"transcripts":[138],"Parliament":[141],"proceedings":[142],"21":[144],"languages.":[146],"exclusively":[148,235],"9,443":[154],"<em>PAN-PC-11</em>.":[164],"instances":[168,220,253],"simulated":[170,222,251],"monolingual":[171],"cross-language":[173],"plagiarism":[174,180,223,252],"used":[177,272],"for":[178,249,402],"evaluating":[179],"detection":[181],"methods":[182],"as":[183,411],"part":[184],"workshop":[187],"series":[188],"Plagiarism":[189],"Analysis,":[190],"Authorship":[191],"Identification,":[192],"Near-Duplicate":[194],"Detection":[195],"(PAN).":[196],"Most":[197],"26,939":[200],"documents":[201,215,271,319,378],"created":[206,226,258],"by":[207,231,260],"extracting":[208],"openly":[211],"available":[212],"books.":[213],"are":[216,345,396],"partially":[217],"interspersed":[218],"with":[219],"obfuscated":[228],"automatically":[229],"or":[230,257],"crowdsourced":[232,261],"workers.":[233,262],"2,921":[241],"Spanish-English":[242],"aligned":[243],"corpus,":[248],"either":[255],"machine-generated":[256],"<strong>File":[264],"Structure</strong>":[265],"<strong>[corpus_documents]":[266],"folder</strong>:":[267,351,372],"Corpora":[268],"translation-aligned":[270,303,318,353],"experiments":[275],"composed":[276],"of:":[277],"aspec:":[278],"aspecx:":[282],"Chinese":[285,485],"jrc:":[286,358],"French":[289,293,361,365],"europarl:":[290,362],"pan:":[294,366],"Spanish":[297,369,426],"Each":[298,315],"sub-corpus":[299],"4,000":[302],"files":[304,333],"(2,000":[305],"per":[306],"language);":[307],"entire":[309],"has":[311],"thus":[312],"20,000":[313],"files.<br>":[314],"set":[316],"was":[320],"randomly":[321,355],"original":[325],"datasets":[326,381],"(details":[327],"paper).<br>":[330],"aspec":[335,437],"aspecx":[337,449],"do":[338],"not":[339],"necessarily":[340],"overlap":[341],"even":[342],"though":[343],"they":[344],"same":[348,409],"dataset.":[349],"<strong>[corpus_paragraphs]":[350],"2,000":[352],"from:":[357],"<br>":[370],"<strong>[vectors_documents]":[371],"Average":[373],"vector":[374],"representation":[375],"pre-trained":[384],"models:":[385],"Universal":[386],"Sentence":[387],"Encoder":[388],"-":[389,430,434,438,445,450,457,462,465,468,472,475,478,481,484],"Multilingual":[390],"(USE-ML)":[391],"ConceptNet":[392,421,431],"Numberbatch":[393,422],"Two":[394],"granularities":[395],"provided:":[397],"vector_paragraphs":[398],"vector_documents":[399],"structure":[401],"each":[403],"level":[404],"granularity":[406],"follows":[407],"pattern":[410],"their":[412],"respective":[413],"<strong>Naming":[415],"convention</strong>:":[416],"&lt;model&gt;_&lt;dataset&gt;_&lt;language&gt;;":[417],"Example:":[418],"cn_jrc_es:":[419],"model:":[420],"corpus:":[423],"JRC-Acquis":[424],"language:":[425],"Labels:":[427],"&lt;model&gt;:<br>":[428],"cn":[429],"Numberbatch<br>":[432],"um":[433],"USE-ML<br>":[435],"&lt;dataset&gt;<br>":[436],"(Asian":[440,452],"Corpus)":[444,456],"Japanese<br>":[448,482],"Chinese<br>":[460],"jrc":[461],"JRC-Acquis<br>":[463],"europarl":[464],"Europarl<br>":[466],"pan":[467],"PAN-PC-11<br>":[469],"&lt;language&gt;<br>":[470],"en":[471],"English<br>":[473],"es":[474],"Spanish<br>":[476],"fr":[477],"French<br>":[479],"ja":[480],"zh":[483]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2022,"cited_by_count":1}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
