{"id":"https://openalex.org/W4408770958","doi":"https://doi.org/10.3390/fi17040135","title":"Explainable Identification of Similarities Between Entities for Discovery in Large Text","display_name":"Explainable Identification of Similarities Between Entities for Discovery in Large Text","publication_year":2025,"publication_date":"2025-03-22","ids":{"openalex":"https://openalex.org/W4408770958","doi":"https://doi.org/10.3390/fi17040135"},"language":"en","primary_location":{"id":"doi:10.3390/fi17040135","is_oa":true,"landing_page_url":"https://doi.org/10.3390/fi17040135","pdf_url":"https://www.mdpi.com/1999-5903/17/4/135/pdf?version=1742631057","source":{"id":"https://openalex.org/S34838331","display_name":"Future Internet","issn_l":"1999-5903","issn":["1999-5903"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Future Internet","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.mdpi.com/1999-5903/17/4/135/pdf?version=1742631057","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5048910324","display_name":"Akhil Joshi","orcid":"https://orcid.org/0000-0002-3443-876X"},"institutions":[{"id":"https://openalex.org/I189590672","display_name":"Kansas State University","ror":"https://ror.org/05p1j8758","country_code":"US","type":"education","lineage":["https://openalex.org/I189590672"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Akhil Joshi","raw_affiliation_strings":["Department of Computer Science, Kansas State University, Manhattan, KS 66502, USA"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, Kansas State University, Manhattan, KS 66502, USA","institution_ids":["https://openalex.org/I189590672"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5115466009","display_name":"Sai Teja Erukude","orcid":null},"institutions":[{"id":"https://openalex.org/I189590672","display_name":"Kansas State University","ror":"https://ror.org/05p1j8758","country_code":"US","type":"education","lineage":["https://openalex.org/I189590672"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Sai Teja Erukude","raw_affiliation_strings":["Department of Computer Science, Kansas State University, Manhattan, KS 66502, USA"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, Kansas State University, Manhattan, KS 66502, USA","institution_ids":["https://openalex.org/I189590672"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5042206068","display_name":"Lior Shamir","orcid":"https://orcid.org/0000-0002-6207-1491"},"institutions":[{"id":"https://openalex.org/I189590672","display_name":"Kansas State University","ror":"https://ror.org/05p1j8758","country_code":"US","type":"education","lineage":["https://openalex.org/I189590672"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Lior Shamir","raw_affiliation_strings":["Department of Computer Science, Kansas State University, Manhattan, KS 66502, USA"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, Kansas State University, Manhattan, KS 66502, USA","institution_ids":["https://openalex.org/I189590672"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5042206068"],"corresponding_institution_ids":["https://openalex.org/I189590672"],"apc_list":{"value":1400,"currency":"CHF","value_usd":1515},"apc_paid":{"value":1400,"currency":"CHF","value_usd":1515},"fwci":4.8027,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.94033609,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":91,"max":99},"biblio":{"volume":"17","issue":"4","first_page":"135","last_page":"135"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13083","display_name":"Advanced Text Analysis Techniques","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T13083","display_name":"Advanced Text Analysis Techniques","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8952505588531494},{"id":"https://openalex.org/keywords/identification","display_name":"Identification (biology)","score":0.7027239203453064},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.49543678760528564},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4283183813095093},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.41330718994140625},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.36024636030197144},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.34262654185295105}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8952505588531494},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.7027239203453064},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.49543678760528564},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4283183813095093},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.41330718994140625},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.36024636030197144},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.34262654185295105},{"id":"https://openalex.org/C59822182","wikidata":"https://www.wikidata.org/wiki/Q441","display_name":"Botany","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.3390/fi17040135","is_oa":true,"landing_page_url":"https://doi.org/10.3390/fi17040135","pdf_url":"https://www.mdpi.com/1999-5903/17/4/135/pdf?version=1742631057","source":{"id":"https://openalex.org/S34838331","display_name":"Future Internet","issn_l":"1999-5903","issn":["1999-5903"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Future Internet","raw_type":"journal-article"},{"id":"pmh:oai:RePEc:gam:jftint:v:17:y:2025:i:4:p:135-:d:1618120","is_oa":false,"landing_page_url":"https://www.mdpi.com/1999-5903/17/4/135/","pdf_url":null,"source":{"id":"https://openalex.org/S4306401271","display_name":"RePEc: Research Papers in Economics","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I77793887","host_organization_name":"Federal Reserve Bank of St. Louis","host_organization_lineage":["https://openalex.org/I77793887"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},{"id":"pmh:oai:doaj.org/article:03ba0e1124514bfbb6a8d7a4e1e70e3e","is_oa":true,"landing_page_url":"https://doaj.org/article/03ba0e1124514bfbb6a8d7a4e1e70e3e","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Future Internet, Vol 17, Iss 4, p 135 (2025)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.3390/fi17040135","is_oa":true,"landing_page_url":"https://doi.org/10.3390/fi17040135","pdf_url":"https://www.mdpi.com/1999-5903/17/4/135/pdf?version=1742631057","source":{"id":"https://openalex.org/S34838331","display_name":"Future Internet","issn_l":"1999-5903","issn":["1999-5903"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Future Internet","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G5477111271","display_name":null,"funder_award_id":"2148878","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4408770958.pdf"},"referenced_works_count":57,"referenced_works":["https://openalex.org/W1539187029","https://openalex.org/W1608789752","https://openalex.org/W1936155969","https://openalex.org/W1966900042","https://openalex.org/W1967981232","https://openalex.org/W1983578042","https://openalex.org/W2029886770","https://openalex.org/W2031253971","https://openalex.org/W2059975159","https://openalex.org/W2066792529","https://openalex.org/W2076372024","https://openalex.org/W2078396547","https://openalex.org/W2087739686","https://openalex.org/W2103148772","https://openalex.org/W2103759455","https://openalex.org/W2117200377","https://openalex.org/W2120779048","https://openalex.org/W2125980212","https://openalex.org/W2129034170","https://openalex.org/W2131744502","https://openalex.org/W2134273450","https://openalex.org/W2145009222","https://openalex.org/W2153222576","https://openalex.org/W2188347037","https://openalex.org/W2285587677","https://openalex.org/W2295512169","https://openalex.org/W2438541405","https://openalex.org/W2507396586","https://openalex.org/W2545734556","https://openalex.org/W2595801002","https://openalex.org/W2796541946","https://openalex.org/W2937423263","https://openalex.org/W2996284762","https://openalex.org/W3012106948","https://openalex.org/W3105625590","https://openalex.org/W3105952014","https://openalex.org/W3128365223","https://openalex.org/W3136111930","https://openalex.org/W3156333129","https://openalex.org/W3174454925","https://openalex.org/W3196202912","https://openalex.org/W3197102031","https://openalex.org/W3213213154","https://openalex.org/W4212926655","https://openalex.org/W4294622174","https://openalex.org/W4296232960","https://openalex.org/W4384197836","https://openalex.org/W4395098141","https://openalex.org/W4399465020","https://openalex.org/W6632450909","https://openalex.org/W6657870505","https://openalex.org/W6669817246","https://openalex.org/W6687181900","https://openalex.org/W6718198998","https://openalex.org/W6734880834","https://openalex.org/W6750517309","https://openalex.org/W6791431630"],"related_works":["https://openalex.org/W2748952813","https://openalex.org/W2898732673","https://openalex.org/W2410053581","https://openalex.org/W2383658677","https://openalex.org/W3123203398","https://openalex.org/W1972473893","https://openalex.org/W2466435674","https://openalex.org/W2765200542","https://openalex.org/W2367893528","https://openalex.org/W3107784576"],"abstract_inverted_index":{"With":[0],"the":[1,55,62,66,69,105,111,116,128,134,143,196],"availability":[2],"of":[3,8,16,104,145],"a":[4,108,177],"virtually":[5],"infinite":[6],"number":[7],"text":[9,161],"documents":[10,58,90],"in":[11,73,121,133],"digital":[12],"format,":[13],"automatic":[14],"comparison":[15],"textual":[17,48],"data":[18],"is":[19,99,113,125,198],"essential":[20],"for":[21,180,195],"extracting":[22],"meaningful":[23],"insights":[24,46,165],"that":[25,154,166],"are":[26,118,130,167],"difficult":[27,169],"to":[28,41,88,101,170],"identify":[29,171],"manually.":[30,172],"Many":[31],"existing":[32],"tools,":[33],"including":[34,186],"AI":[35],"and":[36,44,92,192],"large":[37],"language":[38],"models,":[39],"struggle":[40],"provide":[42],"precise":[43],"explainable":[45,94,164],"into":[47],"similarities.":[49,95],"In":[50],"many":[51],"cases,":[52],"they":[53],"determine":[54],"similarity":[56],"between":[57,68,160],"as":[59],"reflected":[60],"by":[61,81],"text,":[63],"rather":[64],"than":[65],"similarities":[67,159,182],"subjects":[70],"being":[71],"discussed":[72],"these":[74,79,146],"documents.":[75],"This":[76,173],"study":[77],"addresses":[78],"limitations":[80],"developing":[82],"an":[83],"n-gram":[84],"analysis":[85],"framework":[86,156],"designed":[87],"compare":[89],"automatically":[91],"uncover":[93],"A":[96],"scoring":[97],"formula":[98],"applied":[100],"assigns":[102],"each":[103],"n-grams":[106,117,129],"with":[107],"weight,":[109],"where":[110],"weight":[112],"higher":[114],"when":[115,127],"more":[119,131],"frequent":[120,132],"both":[122],"documents,":[123,162],"but":[124],"penalized":[126],"English":[135],"language.":[136],"Visualization":[137],"tools":[138],"like":[139],"word":[140],"clouds":[141],"enhance":[142],"representation":[144],"patterns,":[147],"providing":[148],"clearer":[149],"insights.":[150],"The":[151],"findings":[152],"demonstrate":[153],"this":[155],"effectively":[157],"uncovers":[158],"offering":[163],"often":[168],"non-parametric":[174],"approach":[175],"provides":[176],"deterministic":[178],"solution":[179],"identifying":[181],"across":[183],"various":[184],"fields,":[185],"biographies,":[187],"scientific":[188],"literature,":[189],"historical":[190],"texts,":[191],"more.":[193],"Code":[194],"method":[197],"publicly":[199],"available.":[200]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":1}],"updated_date":"2026-04-05T17:49:38.594831","created_date":"2025-10-10T00:00:00"}
