{"id":"https://openalex.org/W2894561747","doi":"https://doi.org/10.3906/elk-1906-30","title":"A fast text similarity measure for large document collections using multireferencecosine and genetic algorithm","display_name":"A fast text similarity measure for large document collections using multireferencecosine and genetic algorithm","publication_year":2019,"publication_date":"2019-10-30","ids":{"openalex":"https://openalex.org/W2894561747","doi":"https://doi.org/10.3906/elk-1906-30","mag":"2894561747"},"language":"en","primary_location":{"id":"doi:10.3906/elk-1906-30","is_oa":true,"landing_page_url":"https://doi.org/10.3906/elk-1906-30","pdf_url":"https://doi.org/10.3906/elk-1906-30","source":{"id":"https://openalex.org/S32837994","display_name":"TURKISH JOURNAL OF ELECTRICAL ENGINEERING & COMPUTER SCIENCES","issn_l":"1300-0632","issn":["1300-0632","1303-6203"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310318422","host_organization_name":"Scientific and Technological Research Council of Turkey (TUBITAK)","host_organization_lineage":["https://openalex.org/P4310318422"],"host_organization_lineage_names":["Scientific and Technological Research Council of Turkey (TUBITAK)"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"TURKISH JOURNAL OF ELECTRICAL ENGINEERING &amp; COMPUTER SCIENCES","raw_type":"journal-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"bronze","oa_url":"https://doi.org/10.3906/elk-1906-30","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5053264713","display_name":"Hamid Mohammadi","orcid":"https://orcid.org/0000-0002-8854-9342"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Hamid MOHAMMADI","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5022190009","display_name":"Seyed Hossein Khasteh","orcid":"https://orcid.org/0000-0003-2227-4507"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Seyed Hossein KHASTEH","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5053264713"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.00461897,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"28","issue":"2","first_page":"999","last_page":"1013"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9980999827384949,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9980999827384949,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11644","display_name":"Spam and Phishing Detection","score":0.9973999857902527,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9955000281333923,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8016019463539124},{"id":"https://openalex.org/keywords/cosine-similarity","display_name":"Cosine similarity","score":0.7156105041503906},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.6383814215660095},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.6118060350418091},{"id":"https://openalex.org/keywords/hash-function","display_name":"Hash function","score":0.5841421484947205},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.5740846395492554},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5696525573730469},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.4693618416786194},{"id":"https://openalex.org/keywords/measure","display_name":"Measure (data warehouse)","score":0.43304839730262756},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.41683003306388855},{"id":"https://openalex.org/keywords/swift","display_name":"Swift","score":0.4138805866241455},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.2752622961997986},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.22255954146385193},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.18850335478782654},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.1752784550189972}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8016019463539124},{"id":"https://openalex.org/C2780762811","wikidata":"https://www.wikidata.org/wiki/Q1784941","display_name":"Cosine similarity","level":3,"score":0.7156105041503906},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.6383814215660095},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.6118060350418091},{"id":"https://openalex.org/C99138194","wikidata":"https://www.wikidata.org/wiki/Q183427","display_name":"Hash function","level":2,"score":0.5841421484947205},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.5740846395492554},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5696525573730469},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.4693618416786194},{"id":"https://openalex.org/C2780009758","wikidata":"https://www.wikidata.org/wiki/Q6804172","display_name":"Measure (data warehouse)","level":2,"score":0.43304839730262756},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.41683003306388855},{"id":"https://openalex.org/C116188536","wikidata":"https://www.wikidata.org/wiki/Q17118377","display_name":"Swift","level":2,"score":0.4138805866241455},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.2752622961997986},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.22255954146385193},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.18850335478782654},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.1752784550189972},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":5,"locations":[{"id":"doi:10.3906/elk-1906-30","is_oa":true,"landing_page_url":"https://doi.org/10.3906/elk-1906-30","pdf_url":"https://doi.org/10.3906/elk-1906-30","source":{"id":"https://openalex.org/S32837994","display_name":"TURKISH JOURNAL OF ELECTRICAL ENGINEERING & COMPUTER SCIENCES","issn_l":"1300-0632","issn":["1300-0632","1303-6203"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310318422","host_organization_name":"Scientific and Technological Research Council of Turkey (TUBITAK)","host_organization_lineage":["https://openalex.org/P4310318422"],"host_organization_lineage_names":["Scientific and Technological Research Council of Turkey (TUBITAK)"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"TURKISH JOURNAL OF ELECTRICAL ENGINEERING &amp; COMPUTER SCIENCES","raw_type":"journal-article"},{"id":"pmh:oai:arXiv.org:1810.03102","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1810.03102","pdf_url":"https://arxiv.org/pdf/1810.03102","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"mag:2894561747","is_oa":true,"landing_page_url":"https://arxiv.org/pdf/1810.03102","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"pmh:oai:dergipark.org.tr:article/725084","is_oa":false,"landing_page_url":"https://dergipark.org.tr/tr/pub/tbtkelektrik/issue/53851/725084","pdf_url":null,"source":{"id":"https://openalex.org/S4306401840","display_name":"DergiPark (Istanbul University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67581229","host_organization_name":"Istanbul University","host_organization_lineage":["https://openalex.org/I67581229"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"\\n                                                                    Volume: 28, Issue: 2\\n                                                                                                    999-1013\\n                                                                \\n                            ","raw_type":"info:eu-repo/semantics/article"},{"id":"doi:10.48550/arxiv.1810.03102","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.1810.03102","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.3906/elk-1906-30","is_oa":true,"landing_page_url":"https://doi.org/10.3906/elk-1906-30","pdf_url":"https://doi.org/10.3906/elk-1906-30","source":{"id":"https://openalex.org/S32837994","display_name":"TURKISH JOURNAL OF ELECTRICAL ENGINEERING & COMPUTER SCIENCES","issn_l":"1300-0632","issn":["1300-0632","1303-6203"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310318422","host_organization_name":"Scientific and Technological Research Council of Turkey (TUBITAK)","host_organization_lineage":["https://openalex.org/P4310318422"],"host_organization_lineage_names":["Scientific and Technological Research Council of Turkey (TUBITAK)"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"TURKISH JOURNAL OF ELECTRICAL ENGINEERING &amp; COMPUTER SCIENCES","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2894561747.pdf","grobid_xml":"https://content.openalex.org/works/W2894561747.grobid-xml"},"referenced_works_count":33,"referenced_works":["https://openalex.org/W318559539","https://openalex.org/W1493526108","https://openalex.org/W1596691921","https://openalex.org/W1609518033","https://openalex.org/W1638203394","https://openalex.org/W1986968077","https://openalex.org/W2003274231","https://openalex.org/W2007842132","https://openalex.org/W2012833704","https://openalex.org/W2013761541","https://openalex.org/W2062028408","https://openalex.org/W2065259291","https://openalex.org/W2067432306","https://openalex.org/W2068329940","https://openalex.org/W2085922539","https://openalex.org/W2097184821","https://openalex.org/W2109803107","https://openalex.org/W2110166424","https://openalex.org/W2111295912","https://openalex.org/W2111549955","https://openalex.org/W2127675794","https://openalex.org/W2128859735","https://openalex.org/W2134491992","https://openalex.org/W2145349611","https://openalex.org/W2147717514","https://openalex.org/W2148212498","https://openalex.org/W2151930506","https://openalex.org/W2152565070","https://openalex.org/W2157748587","https://openalex.org/W2317417686","https://openalex.org/W2536715162","https://openalex.org/W2537973009","https://openalex.org/W2894811571"],"related_works":["https://openalex.org/W120868463","https://openalex.org/W2907065508","https://openalex.org/W1977729554","https://openalex.org/W2183634779","https://openalex.org/W2100658491","https://openalex.org/W2106399596","https://openalex.org/W3184394705","https://openalex.org/W1966234817","https://openalex.org/W2485855531","https://openalex.org/W2006373804","https://openalex.org/W2743303805","https://openalex.org/W1625138908","https://openalex.org/W2064143200","https://openalex.org/W2154846261","https://openalex.org/W1522405883","https://openalex.org/W1591455229","https://openalex.org/W2182878341","https://openalex.org/W2894811571","https://openalex.org/W2130583333","https://openalex.org/W3003703209"],"abstract_inverted_index":{"One":[0],"of":[1,72,116,146,158,164],"the":[2,29,129,134,144,162],"important":[3],"factors":[4],"that":[5],"make":[6],"a":[7,14,31,35,80],"search":[8,32],"engine":[9,33],"fast":[10],"and":[11,16,25,37,40,68,95,120,126,136],"accurate":[12],"is":[13,88,91,103,141,161],"concise":[15],"duplicate":[17,24,39],"free":[18],"index.":[19],"In":[20,77],"order":[21],"to":[22,48,84,150,167],"remove":[23],"near-duplicate":[26,41,74],"documents":[27,75],"from":[28],"index,":[30],"needs":[34,96],"swift":[36],"reliable":[38,94],"text":[42,85,107,154],"document":[43,108],"detection":[44,87],"system.":[45],"Traditional":[46],"approaches":[47],"this":[49,78,159],"problem,":[50],"such":[51,110],"as":[52,63,111],"brute":[53],"force":[54],"comparisons":[55],"or":[56],"simple":[57],"hash-based":[58],"algorithms":[59,166],"are":[60,65,69,124],"not":[61,66,70],"suitable":[62],"they":[64],"scalable":[67],"capable":[71],"detecting":[73],"effectively.":[76],"paper,":[79],"new":[81],"signature-based":[82],"approach":[83],"similarity":[86],"introduced":[89],"which":[90],"fast,":[92],"scalable,":[93],"less":[97],"storage":[98],"space.":[99],"The":[100,122,138,156],"proposed":[101,139],"method":[102,140],"examined":[104],"on":[105,143],"popular":[106],"data-sets":[109],"CiteseerX,":[112],"Enron,":[113],"Gold":[114],"Set":[115],"Near-duplicate":[117],"News":[118],"Articles":[119],"etc.":[121],"results":[123],"promising":[125],"comparable":[127],"with":[128],"best":[130],"cutting-edge":[131],"algorithms,":[132],"considering":[133],"accuracy":[135],"performance.":[137],"based":[142],"idea":[145],"using":[147],"reference":[148,170],"texts":[149],"generate":[151,168],"signatures":[152],"for":[153],"documents.":[155],"novelty":[157],"paper":[160],"use":[163],"genetic":[165],"better":[169],"texts.":[171]},"counts_by_year":[],"updated_date":"2026-02-09T09:26:11.010843","created_date":"2025-10-10T00:00:00"}
