{"id":"https://openalex.org/W2754186199","doi":"https://doi.org/10.4149/cai_2017_4_887","title":"Evaluation and Implementation of n-Gram-Based Algorithm for Fast Text Comparison","display_name":"Evaluation and Implementation of n-Gram-Based Algorithm for Fast Text Comparison","publication_year":2017,"publication_date":"2017-01-01","ids":{"openalex":"https://openalex.org/W2754186199","doi":"https://doi.org/10.4149/cai_2017_4_887","mag":"2754186199"},"language":"en","primary_location":{"id":"doi:10.4149/cai_2017_4_887","is_oa":true,"landing_page_url":"https://doi.org/10.4149/cai_2017_4_887","pdf_url":null,"source":{"id":"https://openalex.org/S4210200093","display_name":"Computing and Informatics","issn_l":"1335-9150","issn":["1335-9150","2585-8807"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Computing and Informatics","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://doi.org/10.4149/cai_2017_4_887","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5045630744","display_name":"Maciej Wielgosz","orcid":"https://orcid.org/0000-0002-4401-2957"},"institutions":[{"id":"https://openalex.org/I686019","display_name":"AGH University of Krakow","ror":"https://ror.org/00bas1c41","country_code":"PL","type":"education","lineage":["https://openalex.org/I686019"]}],"countries":["PL"],"is_corresponding":false,"raw_author_name":"Maciej Wielgosz","raw_affiliation_strings":["AGH University of Science and Technology, Krakow"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"AGH University of Science and Technology, Krakow","institution_ids":["https://openalex.org/I686019"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5098606033","display_name":"Pawe\u0142 Szczepka","orcid":null},"institutions":[{"id":"https://openalex.org/I686019","display_name":"AGH University of Krakow","ror":"https://ror.org/00bas1c41","country_code":"PL","type":"education","lineage":["https://openalex.org/I686019"]}],"countries":["PL"],"is_corresponding":false,"raw_author_name":"Pawe\u0142 Szczepka","raw_affiliation_strings":["AGH University of Science and Technology, Krakow"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"AGH University of Science and Technology, Krakow","institution_ids":["https://openalex.org/I686019"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5052669287","display_name":"Pawel Grzegorz Russek","orcid":"https://orcid.org/0000-0003-3858-4278"},"institutions":[{"id":"https://openalex.org/I686019","display_name":"AGH University of Krakow","ror":"https://ror.org/00bas1c41","country_code":"PL","type":"education","lineage":["https://openalex.org/I686019"]}],"countries":["PL"],"is_corresponding":false,"raw_author_name":"Pawe\u0142 Russek","raw_affiliation_strings":["AGH University of Science and Technology, Krakow"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"AGH University of Science and Technology, Krakow","institution_ids":["https://openalex.org/I686019"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5082630408","display_name":"E. Jamro","orcid":"https://orcid.org/0000-0003-4632-2470"},"institutions":[{"id":"https://openalex.org/I686019","display_name":"AGH University of Krakow","ror":"https://ror.org/00bas1c41","country_code":"PL","type":"education","lineage":["https://openalex.org/I686019"]}],"countries":["PL"],"is_corresponding":false,"raw_author_name":"Ernest Jamro","raw_affiliation_strings":["AGH University of Science and Technology, Krakow"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"AGH University of Science and Technology, Krakow","institution_ids":["https://openalex.org/I686019"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5048697784","display_name":"K. Wiatr","orcid":"https://orcid.org/0000-0001-5959-0277"},"institutions":[{"id":"https://openalex.org/I686019","display_name":"AGH University of Krakow","ror":"https://ror.org/00bas1c41","country_code":"PL","type":"education","lineage":["https://openalex.org/I686019"]}],"countries":["PL"],"is_corresponding":false,"raw_author_name":"Kazimierz Wiatr","raw_affiliation_strings":["AGH University of Science and Technology, Krakow"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"AGH University of Science and Technology, Krakow","institution_ids":["https://openalex.org/I686019"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5036847074","display_name":"Marcin Pietro\u0144","orcid":"https://orcid.org/0000-0001-9357-9231"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Marcin Pietro\u0144","raw_affiliation_strings":["ACC Cyfronet AGH, Krakow"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"ACC Cyfronet AGH, Krakow","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5084220744","display_name":"Dominik \u017burek","orcid":"https://orcid.org/0000-0001-5329-1452"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dominik Zurek","raw_affiliation_strings":["ACC Cyfronet AGH, Krakow"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"ACC Cyfronet AGH, Krakow","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":2.0256,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.9012305,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":"36","issue":"4","first_page":"887","last_page":"907"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T14025","display_name":"Educational Technology and Assessment","score":0.6590999960899353,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T14025","display_name":"Educational Technology and Assessment","score":0.6590999960899353,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.6574000120162964,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8406480550765991},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.6653757095336914},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.6210811138153076},{"id":"https://openalex.org/keywords/winnowing","display_name":"Winnowing","score":0.5637722611427307},{"id":"https://openalex.org/keywords/n-gram","display_name":"n-gram","score":0.5240233540534973},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.47188058495521545},{"id":"https://openalex.org/keywords/general-purpose-computing-on-graphics-processing-units","display_name":"General-purpose computing on graphics processing units","score":0.43725132942199707},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.4196890592575073},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.3205527067184448},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.1664946973323822},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.15452569723129272},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.1394103467464447},{"id":"https://openalex.org/keywords/graphics","display_name":"Graphics","score":0.13124385476112366}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8406480550765991},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.6653757095336914},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.6210811138153076},{"id":"https://openalex.org/C9396515","wikidata":"https://www.wikidata.org/wiki/Q961751","display_name":"Winnowing","level":2,"score":0.5637722611427307},{"id":"https://openalex.org/C117884012","wikidata":"https://www.wikidata.org/wiki/Q94489","display_name":"n-gram","level":3,"score":0.5240233540534973},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.47188058495521545},{"id":"https://openalex.org/C50630238","wikidata":"https://www.wikidata.org/wiki/Q971505","display_name":"General-purpose computing on graphics processing units","level":3,"score":0.43725132942199707},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.4196890592575073},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.3205527067184448},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.1664946973323822},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.15452569723129272},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.1394103467464447},{"id":"https://openalex.org/C21442007","wikidata":"https://www.wikidata.org/wiki/Q1027879","display_name":"Graphics","level":2,"score":0.13124385476112366},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.0},{"id":"https://openalex.org/C95457728","wikidata":"https://www.wikidata.org/wiki/Q309","display_name":"History","level":0,"score":0.0},{"id":"https://openalex.org/C166957645","wikidata":"https://www.wikidata.org/wiki/Q23498","display_name":"Archaeology","level":1,"score":0.0},{"id":"https://openalex.org/C121684516","wikidata":"https://www.wikidata.org/wiki/Q7600677","display_name":"Computer graphics (images)","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.4149/cai_2017_4_887","is_oa":true,"landing_page_url":"https://doi.org/10.4149/cai_2017_4_887","pdf_url":null,"source":{"id":"https://openalex.org/S4210200093","display_name":"Computing and Informatics","issn_l":"1335-9150","issn":["1335-9150","2585-8807"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Computing and Informatics","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.4149/cai_2017_4_887","is_oa":true,"landing_page_url":"https://doi.org/10.4149/cai_2017_4_887","pdf_url":null,"source":{"id":"https://openalex.org/S4210200093","display_name":"Computing and Informatics","issn_l":"1335-9150","issn":["1335-9150","2585-8807"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Computing and Informatics","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2056302143","https://openalex.org/W2380102103","https://openalex.org/W2808839349","https://openalex.org/W4293080099","https://openalex.org/W2383760405","https://openalex.org/W1768142643","https://openalex.org/W2499279132","https://openalex.org/W1974690493","https://openalex.org/W1980160788","https://openalex.org/W2793154256"],"abstract_inverted_index":{"This":[0],"paper":[1,201,243],"presents":[2,203],"a":[3,17,96,111,123,171,178],"study":[4],"of":[5,30,41,48,56,92,114,138,161,181,195,208,227,231,241,247],"an":[6,28],"n-gram-based":[7],"document":[8,260],"comparison":[9,184,198,261],"method.":[10],"The":[11,22,60,72,89,104,118,152,200,215,229,239],"method":[12,90],"is":[13,107,265],"intended":[14],"to":[15,79,84,109,148,157,190,253],"build":[16],"large-scale":[18],"plagiarism":[19],"detection":[20,49,67,150],"system.":[21,262],"work":[23],"focuses":[24],"not":[25],"only":[26],"on":[27,37,234],"efficiency":[29],"the":[31,38,42,57,64,81,115,128,133,139,155,159,162,182,192,196,209,225,232,242,245,255,268],"text":[32,102,183,197],"similarity":[33],"extraction":[34,99],"but":[35],"also":[36,202],"execution":[39,54,87],"performance":[40,160,264],"implemented":[43,122],"algorithms.":[44],"We":[45],"took":[46],"notice":[47],"performance,":[50],"storage":[51],"requirements":[52],"and":[53,69,74,83,100,121,136,167,174,188,205,224,250],"time":[55],"proposed":[58,187],"approach.":[59],"obtained":[61],"results":[62],"show":[63],"trade-offs":[65],"between":[66],"quality":[68],"computational":[70,193],"requirements.":[71],"GPGPU":[73,204],"multi-CPU":[75,206,235],"platforms":[76,236],"were":[77,146],"considered":[78],"implement":[80],"algorithms":[82,210],"achieve":[85],"good":[86],"speed.":[88],"consists":[91],"two":[93],"main":[94],"algorithms:":[95],"document's":[97],"feature":[98],"fast":[101,259],"comparison.":[103],"winnowing":[105,163],"algorithm":[106,164,185,233],"used":[108,147,252],"generate":[110],"compressed":[112],"representation":[113],"analyzed":[116],"documents.":[117],"authors":[119,153,240],"designed":[120],"dedicated":[124],"test":[125],"framework":[126],"for":[127,132,165,170,211,220],"algorithm.":[129],"That":[130],"allowed":[131],"tuning,":[134],"evaluation,":[135],"optimization":[137],"parameters.":[140],"Well-known":[141],"metrics":[142],"(e.g.":[143],"precision,":[144],"recall)":[145],"evaluate":[149],"performance.":[151],"conducted":[154,256],"tests":[156],"determine":[158],"obfuscated":[166],"unobfuscated":[168],"texts":[169],"different":[172,212,221],"window":[173],"n-gram":[175],"size.":[176],"Also,":[177],"simplified":[179],"version":[180],"was":[186,218,237],"evaluated":[189],"reduce":[191],"complexity":[194],"process.":[199],"implementations":[207],"data":[213],"structures.":[214],"implementation":[216],"speed":[217],"tested":[219],"algorithms'":[222],"parameters":[223],"size":[226],"data.":[228],"scalability":[230],"verified.":[238],"provide":[244],"repository":[246],"software":[248],"tools":[249],"programs":[251],"perform":[254],"experiments.he":[257],"appropriate":[258],"Its":[263],"given":[266],"in":[267],"paper.":[269]},"counts_by_year":[{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":2},{"year":2019,"cited_by_count":1},{"year":2017,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
