{"id":"https://openalex.org/W1981470717","doi":"https://doi.org/10.1145/2659532.2659635","title":"Similarity detection among longer texts by matching keywords found in segments","display_name":"Similarity detection among longer texts by matching keywords found in segments","publication_year":2014,"publication_date":"2014-06-27","ids":{"openalex":"https://openalex.org/W1981470717","doi":"https://doi.org/10.1145/2659532.2659635","mag":"1981470717"},"language":"en","primary_location":{"id":"doi:10.1145/2659532.2659635","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2659532.2659635","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 15th International Conference on Computer Systems and Technologies","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5058237783","display_name":"Tom\u00e1\u0161 Ku\u010de\u010dka","orcid":null},"institutions":[{"id":"https://openalex.org/I110757952","display_name":"Slovak University of Technology in Bratislava","ror":"https://ror.org/0561ghm58","country_code":"SK","type":"education","lineage":["https://openalex.org/I110757952"]}],"countries":["SK"],"is_corresponding":true,"raw_author_name":"Tom\u00e1\u0161 Ku\u010de\u010dka","raw_affiliation_strings":["Slovak University of Technology in Bratislava, Slovak Republic"],"affiliations":[{"raw_affiliation_string":"Slovak University of Technology in Bratislava, Slovak Republic","institution_ids":["https://openalex.org/I110757952"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5046586772","display_name":"Daniela Chud\u00e1","orcid":"https://orcid.org/0000-0002-3873-9308"},"institutions":[{"id":"https://openalex.org/I110757952","display_name":"Slovak University of Technology in Bratislava","ror":"https://ror.org/0561ghm58","country_code":"SK","type":"education","lineage":["https://openalex.org/I110757952"]}],"countries":["SK"],"is_corresponding":false,"raw_author_name":"Daniela Chud\u00e1","raw_affiliation_strings":["Slovak University of Technology in Bratislava, Slovak Republic"],"affiliations":[{"raw_affiliation_string":"Slovak University of Technology in Bratislava, Slovak Republic","institution_ids":["https://openalex.org/I110757952"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5058237783"],"corresponding_institution_ids":["https://openalex.org/I110757952"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.05844848,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"142","last_page":"149"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13083","display_name":"Advanced Text Analysis Techniques","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T13083","display_name":"Advanced Text Analysis Techniques","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.824536919593811},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.7772775888442993},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.6637332439422607},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5985679626464844},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.5310160517692566},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.5177021026611328},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.5139839053153992},{"id":"https://openalex.org/keywords/field","display_name":"Field (mathematics)","score":0.5058016777038574},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.43292737007141113},{"id":"https://openalex.org/keywords/the-internet","display_name":"The Internet","score":0.43234843015670776},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.430412232875824},{"id":"https://openalex.org/keywords/slovak","display_name":"Slovak","score":0.41212600469589233},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.19736811518669128},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.15715867280960083},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.15339389443397522},{"id":"https://openalex.org/keywords/czech","display_name":"Czech","score":0.10195863246917725},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.07635420560836792}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.824536919593811},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.7772775888442993},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6637332439422607},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5985679626464844},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.5310160517692566},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.5177021026611328},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.5139839053153992},{"id":"https://openalex.org/C9652623","wikidata":"https://www.wikidata.org/wiki/Q190109","display_name":"Field (mathematics)","level":2,"score":0.5058016777038574},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.43292737007141113},{"id":"https://openalex.org/C110875604","wikidata":"https://www.wikidata.org/wiki/Q75","display_name":"The Internet","level":2,"score":0.43234843015670776},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.430412232875824},{"id":"https://openalex.org/C2780102689","wikidata":"https://www.wikidata.org/wiki/Q9058","display_name":"Slovak","level":3,"score":0.41212600469589233},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.19736811518669128},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.15715867280960083},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.15339389443397522},{"id":"https://openalex.org/C2777842544","wikidata":"https://www.wikidata.org/wiki/Q9056","display_name":"Czech","level":2,"score":0.10195863246917725},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.07635420560836792},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C202444582","wikidata":"https://www.wikidata.org/wiki/Q837863","display_name":"Pure mathematics","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/2659532.2659635","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2659532.2659635","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 15th International Conference on Computer Systems and Technologies","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.8399999737739563}],"awards":[{"id":"https://openalex.org/G2617358657","display_name":null,"funder_award_id":"VG1/0971/11","funder_id":"https://openalex.org/F4320321779","funder_display_name":"Ministerstvo \u0161kolstva, vedy, v\u00fdskumu a \u0161portu Slovenskej republiky"}],"funders":[{"id":"https://openalex.org/F4320321779","display_name":"Ministerstvo \u0161kolstva, vedy, v\u00fdskumu a \u0161portu Slovenskej republiky","ror":"https://ror.org/044gwpv05"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":9,"referenced_works":["https://openalex.org/W1482854515","https://openalex.org/W1557074680","https://openalex.org/W1568686608","https://openalex.org/W1603526456","https://openalex.org/W1781198478","https://openalex.org/W1979469248","https://openalex.org/W2100626830","https://openalex.org/W2128709346","https://openalex.org/W6603809027"],"related_works":["https://openalex.org/W4213232489","https://openalex.org/W3125406327","https://openalex.org/W2506587716","https://openalex.org/W2474067781","https://openalex.org/W4383571492","https://openalex.org/W2592002436","https://openalex.org/W4205994876","https://openalex.org/W2485712061","https://openalex.org/W2272734367","https://openalex.org/W2327130486"],"abstract_inverted_index":{"Similarity":[0],"detection":[1],"among":[2,53,60,148],"textual":[3,17,81,150],"data":[4,18],"is":[5,92,143,159],"becoming":[6],"more":[7,35],"important":[8,98,106],"with":[9,121],"the":[10,41,125,140,153],"spread":[11],"of":[12,16,23,97,127,156],"Internet":[13],"and":[14,34,130],"growth":[15],"on":[19,48,94,124,167],"it.":[20],"The":[21,87,133],"field":[22],"our":[24,75,122,157,161],"research":[25],"are":[26,45],"long":[27],"texts":[28,55],"as":[29,170],"this":[30,71],"domain":[31],"requires":[32,57],"different":[33],"sophisticated":[36],"approaches":[37],"when":[38],"compared":[39],"to":[40,79],"standard":[42],"methods":[43],"that":[44,63,111,139],"well":[46],"working":[47],"shorter":[49],"texts.":[50,69],"Identifying":[51],"similarity":[52,59,147],"longer":[54,149],"usually":[56],"identifying":[58],"smaller":[61],"segments":[62,110],"can":[64,163],"be":[65,164],"found":[66],"in":[67,84,100],"these":[68,105],"In":[70],"paper":[72],"we":[73,90,108,135],"propose":[74,91],"own":[76],"approach":[77,123,162],"aimed":[78],"segment":[80],"documents":[82],"written":[83],"natural":[85],"language.":[86],"segmentation":[88],"process":[89],"based":[93],"analysing":[95],"positions":[96],"words":[99,107],"document":[101],"content.":[102],"By":[103],"grouping":[104],"create":[109],"do":[112,114],"or":[113],"not":[115],"overlap.":[116],"We":[117],"carried":[118],"several":[119],"experiments":[120,158],"corpus":[126],"students'":[128],"bachelor":[129],"master":[131],"thesis.":[132],"results":[134],"present":[136],"here":[137],"prove":[138],"proposed":[141],"method":[142],"suitable":[144],"for":[145],"detecting":[146],"documents.":[151],"Although":[152],"target":[154],"language":[155],"Slovak,":[160],"easily":[165],"applied":[166],"other":[168],"languages":[169],"well.":[171]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
