{"id":"https://openalex.org/W2767899888","doi":"https://doi.org/10.1145/3132847.3133097","title":"Source Retrieval for Web-Scale Text Reuse Detection","display_name":"Source Retrieval for Web-Scale Text Reuse Detection","publication_year":2017,"publication_date":"2017-11-06","ids":{"openalex":"https://openalex.org/W2767899888","doi":"https://doi.org/10.1145/3132847.3133097","mag":"2767899888"},"language":"en","primary_location":{"id":"doi:10.1145/3132847.3133097","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3132847.3133097","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2017 ACM on Conference on Information and Knowledge Management","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5014322854","display_name":"Matthias Hagen","orcid":"https://orcid.org/0000-0002-9733-2890"},"institutions":[{"id":"https://openalex.org/I51441396","display_name":"Bauhaus-Universit\u00e4t Weimar","ror":"https://ror.org/033bb5z47","country_code":"DE","type":"education","lineage":["https://openalex.org/I51441396"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Matthias Hagen","raw_affiliation_strings":["Bauhaus-Universit\u00e4t Weimar, Weimar, Germany"],"affiliations":[{"raw_affiliation_string":"Bauhaus-Universit\u00e4t Weimar, Weimar, Germany","institution_ids":["https://openalex.org/I51441396"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5083712311","display_name":"Martin Potthast","orcid":"https://orcid.org/0000-0003-2451-0665"},"institutions":[{"id":"https://openalex.org/I51441396","display_name":"Bauhaus-Universit\u00e4t Weimar","ror":"https://ror.org/033bb5z47","country_code":"DE","type":"education","lineage":["https://openalex.org/I51441396"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Martin Potthast","raw_affiliation_strings":["Bauhaus-Universit\u00e4t Weimar, Weimar, Germany"],"affiliations":[{"raw_affiliation_string":"Bauhaus-Universit\u00e4t Weimar, Weimar, Germany","institution_ids":["https://openalex.org/I51441396"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038396317","display_name":"Payam Adineh","orcid":null},"institutions":[{"id":"https://openalex.org/I51441396","display_name":"Bauhaus-Universit\u00e4t Weimar","ror":"https://ror.org/033bb5z47","country_code":"DE","type":"education","lineage":["https://openalex.org/I51441396"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Payam Adineh","raw_affiliation_strings":["Bauhaus-Universit\u00e4t Weimar, Weimar, Germany"],"affiliations":[{"raw_affiliation_string":"Bauhaus-Universit\u00e4t Weimar, Weimar, Germany","institution_ids":["https://openalex.org/I51441396"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5074768665","display_name":"Ehsan Fatehifar","orcid":null},"institutions":[{"id":"https://openalex.org/I51441396","display_name":"Bauhaus-Universit\u00e4t Weimar","ror":"https://ror.org/033bb5z47","country_code":"DE","type":"education","lineage":["https://openalex.org/I51441396"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Ehsan Fatehifar","raw_affiliation_strings":["Bauhaus-Universit\u00e4t Weimar, Weimar, Germany"],"affiliations":[{"raw_affiliation_string":"Bauhaus-Universit\u00e4t Weimar, Weimar, Germany","institution_ids":["https://openalex.org/I51441396"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5027915931","display_name":"Benno Stein","orcid":"https://orcid.org/0000-0001-9033-2217"},"institutions":[{"id":"https://openalex.org/I51441396","display_name":"Bauhaus-Universit\u00e4t Weimar","ror":"https://ror.org/033bb5z47","country_code":"DE","type":"education","lineage":["https://openalex.org/I51441396"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Benno Stein","raw_affiliation_strings":["Bauhaus-Universit\u00e4t Weimar, Weimar, Germany"],"affiliations":[{"raw_affiliation_string":"Bauhaus-Universit\u00e4t Weimar, Weimar, Germany","institution_ids":["https://openalex.org/I51441396"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5014322854"],"corresponding_institution_ids":["https://openalex.org/I51441396"],"apc_list":null,"apc_paid":null,"fwci":3.3854,"has_fulltext":false,"cited_by_count":12,"citation_normalized_percentile":{"value":0.93859264,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"2091","last_page":"2094"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10286","display_name":"Information Retrieval and Search Behavior","score":0.9937999844551086,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9908999800682068,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8020027875900269},{"id":"https://openalex.org/keywords/reuse","display_name":"Reuse","score":0.6959952116012573},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.6121319532394409},{"id":"https://openalex.org/keywords/scale","display_name":"Scale (ratio)","score":0.5674446225166321},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.5069777965545654},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.06949585676193237}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8020027875900269},{"id":"https://openalex.org/C206588197","wikidata":"https://www.wikidata.org/wiki/Q846574","display_name":"Reuse","level":2,"score":0.6959952116012573},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.6121319532394409},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.5674446225166321},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.5069777965545654},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.06949585676193237},{"id":"https://openalex.org/C548081761","wikidata":"https://www.wikidata.org/wiki/Q180388","display_name":"Waste management","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3132847.3133097","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3132847.3133097","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2017 ACM on Conference on Information and Knowledge Management","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":22,"referenced_works":["https://openalex.org/W22306723","https://openalex.org/W1535642915","https://openalex.org/W1974336599","https://openalex.org/W2009571180","https://openalex.org/W2052739481","https://openalex.org/W2053017876","https://openalex.org/W2296042303","https://openalex.org/W2296582998","https://openalex.org/W2328123515","https://openalex.org/W2346609371","https://openalex.org/W2395614926","https://openalex.org/W2395854963","https://openalex.org/W2400223370","https://openalex.org/W2400384985","https://openalex.org/W2400478366","https://openalex.org/W2400796284","https://openalex.org/W2401309779","https://openalex.org/W2405176170","https://openalex.org/W2407028518","https://openalex.org/W2407376471","https://openalex.org/W2408260220","https://openalex.org/W4285719527"],"related_works":["https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W2384475851","https://openalex.org/W2000444236","https://openalex.org/W2358668433","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W2353602216","https://openalex.org/W2382290278","https://openalex.org/W2478288626"],"abstract_inverted_index":{"The":[0],"first":[1],"step":[2,75],"of":[3,18,64,80,99,116,125],"text":[4,23,48,65],"reuse":[5,66,82],"detection":[6],"addresses":[7],"the":[8,42,51,72,77,117,119],"source":[9,63,73,85],"retrieval":[10,74,86],"problem:":[11],"given":[12],"a":[13,16,34,39,47,81,88,91,104,123,129,134],"suspicious":[14,52],"document,":[15],"set":[17],"candidate":[19],"sources":[20],"from":[21],"which":[22],"might":[24],"have":[25,28],"been":[26],"reused":[27,58],"to":[29,56,111],"be":[30],"retrieved":[31,43,70],"by":[32,95],"querying":[33],"search":[35],"engine.":[36],"Afterwards,":[37],"in":[38,54,103],"second":[40],"step,":[41],"candidates":[44],"run":[45],"through":[46],"alignment":[49],"with":[50],"document":[53],"order":[55],"identify":[57],"passages.":[59],"Obviously,":[60],"any":[61],"true":[62],"that":[67,132],"is":[68,87],"not":[69],"during":[71],"reduces":[76],"overall":[78],"recall":[79,124,135],"detector.":[83],"Hence,":[84],"recall-oriented":[89],"task,":[90],"fact":[92],"ignored":[93],"even":[94],"experts:":[96],"Only":[97],"3":[98],"20":[100],"teams":[101],"participating":[102],"respective":[105],"task":[106],"at":[107],"PAN":[108],"2012-2016":[109],"managed":[110],"find":[112],"more":[113],"than":[114],"half":[115],"sources,":[118],"best":[120],"one":[121],"achieving":[122],"only~0.59.":[126],"We":[127],"propose":[128],"new":[130],"approach":[131],"reaches":[133],"of~0.89---a":[136],"performance":[137],"gain":[138],"of~51%.":[139]},"counts_by_year":[{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":3},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":4},{"year":2018,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
