{"id":"https://openalex.org/W4387060314","doi":"https://doi.org/10.3390/informatics10040076","title":"Analyzing Indo-European Language Similarities Using Document Vectors","display_name":"Analyzing Indo-European Language Similarities Using Document Vectors","publication_year":2023,"publication_date":"2023-09-26","ids":{"openalex":"https://openalex.org/W4387060314","doi":"https://doi.org/10.3390/informatics10040076"},"language":"en","primary_location":{"id":"doi:10.3390/informatics10040076","is_oa":true,"landing_page_url":"https://doi.org/10.3390/informatics10040076","pdf_url":"https://www.mdpi.com/2227-9709/10/4/76/pdf?version=1695730979","source":{"id":"https://openalex.org/S2738238905","display_name":"Informatics","issn_l":"2227-9709","issn":["2227-9709"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Informatics","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.mdpi.com/2227-9709/10/4/76/pdf?version=1695730979","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5109641386","display_name":"Samuel R. Schrader","orcid":null},"institutions":[{"id":"https://openalex.org/I87547150","display_name":"Southern Illinois University Edwardsville","ror":"https://ror.org/04cqs5j56","country_code":"US","type":"education","lineage":["https://openalex.org/I2801502357","https://openalex.org/I87547150"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Samuel R. Schrader","raw_affiliation_strings":["Department of Computer Science, Southern Illinois University Edwardsville, Edwardsville, IL 62026, USA"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, Southern Illinois University Edwardsville, Edwardsville, IL 62026, USA","institution_ids":["https://openalex.org/I87547150"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5078722503","display_name":"Eren Gultepe","orcid":null},"institutions":[{"id":"https://openalex.org/I87547150","display_name":"Southern Illinois University Edwardsville","ror":"https://ror.org/04cqs5j56","country_code":"US","type":"education","lineage":["https://openalex.org/I2801502357","https://openalex.org/I87547150"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Eren Gultepe","raw_affiliation_strings":["Department of Computer Science, Southern Illinois University Edwardsville, Edwardsville, IL 62026, USA"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, Southern Illinois University Edwardsville, Edwardsville, IL 62026, USA","institution_ids":["https://openalex.org/I87547150"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5078722503"],"corresponding_institution_ids":["https://openalex.org/I87547150"],"apc_list":{"value":1600,"currency":"CHF","value_usd":1732},"apc_paid":{"value":1600,"currency":"CHF","value_usd":1732},"fwci":0.5147,"has_fulltext":true,"cited_by_count":3,"citation_normalized_percentile":{"value":0.71990008,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":97},"biblio":{"volume":"10","issue":"4","first_page":"76","last_page":"76"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12380","display_name":"Authorship Attribution and Profiling","score":0.9950000047683716,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9760000109672546,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.6700546145439148},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6338182687759399},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.6299037933349609},{"id":"https://openalex.org/keywords/hierarchical-clustering","display_name":"Hierarchical clustering","score":0.596078634262085},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5864613056182861},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.5560808181762695},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.4826410412788391},{"id":"https://openalex.org/keywords/language-family","display_name":"Language family","score":0.4776674211025238},{"id":"https://openalex.org/keywords/tree","display_name":"Tree (set theory)","score":0.4769655466079712},{"id":"https://openalex.org/keywords/slavic-languages","display_name":"Slavic languages","score":0.41971927881240845},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.39695337414741516},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.21058455109596252},{"id":"https://openalex.org/keywords/combinatorics","display_name":"Combinatorics","score":0.0836024284362793}],"concepts":[{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6700546145439148},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6338182687759399},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.6299037933349609},{"id":"https://openalex.org/C92835128","wikidata":"https://www.wikidata.org/wiki/Q1277447","display_name":"Hierarchical clustering","level":3,"score":0.596078634262085},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5864613056182861},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.5560808181762695},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.4826410412788391},{"id":"https://openalex.org/C2780566098","wikidata":"https://www.wikidata.org/wiki/Q25295","display_name":"Language family","level":2,"score":0.4776674211025238},{"id":"https://openalex.org/C113174947","wikidata":"https://www.wikidata.org/wiki/Q2859736","display_name":"Tree (set theory)","level":2,"score":0.4769655466079712},{"id":"https://openalex.org/C121894898","wikidata":"https://www.wikidata.org/wiki/Q23526","display_name":"Slavic languages","level":2,"score":0.41971927881240845},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.39695337414741516},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.21058455109596252},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0836024284362793},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.3390/informatics10040076","is_oa":true,"landing_page_url":"https://doi.org/10.3390/informatics10040076","pdf_url":"https://www.mdpi.com/2227-9709/10/4/76/pdf?version=1695730979","source":{"id":"https://openalex.org/S2738238905","display_name":"Informatics","issn_l":"2227-9709","issn":["2227-9709"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Informatics","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:509a7c634b2041c595bf3a667bbf0dde","is_oa":true,"landing_page_url":"https://doaj.org/article/509a7c634b2041c595bf3a667bbf0dde","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Informatics, Vol 10, Iss 4, p 76 (2023)","raw_type":"article"},{"id":"pmh:oai:mdpi.com:/2227-9709/10/4/76/","is_oa":true,"landing_page_url":"https://dx.doi.org/10.3390/informatics10040076","pdf_url":null,"source":{"id":"https://openalex.org/S4306400947","display_name":"MDPI (MDPI AG)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210097602","host_organization_name":"Multidisciplinary Digital Publishing Institute (Switzerland)","host_organization_lineage":["https://openalex.org/I4210097602"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Informatics","raw_type":"Text"}],"best_oa_location":{"id":"doi:10.3390/informatics10040076","is_oa":true,"landing_page_url":"https://doi.org/10.3390/informatics10040076","pdf_url":"https://www.mdpi.com/2227-9709/10/4/76/pdf?version=1695730979","source":{"id":"https://openalex.org/S2738238905","display_name":"Informatics","issn_l":"2227-9709","issn":["2227-9709"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Informatics","raw_type":"journal-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.800000011920929,"display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4387060314.pdf"},"referenced_works_count":34,"referenced_works":["https://openalex.org/W22168010","https://openalex.org/W1867732144","https://openalex.org/W1979618484","https://openalex.org/W1987836480","https://openalex.org/W2009506985","https://openalex.org/W2016381774","https://openalex.org/W2060425093","https://openalex.org/W2095293504","https://openalex.org/W2117801354","https://openalex.org/W2131681506","https://openalex.org/W2131744502","https://openalex.org/W2148374900","https://openalex.org/W2171990733","https://openalex.org/W2222512263","https://openalex.org/W2252042690","https://openalex.org/W2566957588","https://openalex.org/W2610003462","https://openalex.org/W2740656274","https://openalex.org/W2777073510","https://openalex.org/W2886643713","https://openalex.org/W2963826397","https://openalex.org/W2991388718","https://openalex.org/W3017290615","https://openalex.org/W3099768174","https://openalex.org/W3105864225","https://openalex.org/W3212926742","https://openalex.org/W4246728774","https://openalex.org/W4294170691","https://openalex.org/W6639077426","https://openalex.org/W6649753774","https://openalex.org/W6682691769","https://openalex.org/W6685380521","https://openalex.org/W6698446958","https://openalex.org/W6777517041"],"related_works":["https://openalex.org/W2089491222","https://openalex.org/W2474786147","https://openalex.org/W3216283554","https://openalex.org/W2059878710","https://openalex.org/W3113209272","https://openalex.org/W2913258845","https://openalex.org/W4206171893","https://openalex.org/W4243133228","https://openalex.org/W4304204731","https://openalex.org/W2790719030"],"abstract_inverted_index":{"The":[0,34,87,153],"evaluation":[1],"of":[2,12,31,42,49,52,72,75,113,183],"similarities":[3],"between":[4,95,147],"natural":[5],"languages":[6,14,27],"often":[7],"relies":[8],"on":[9,46,138],"prior":[10],"knowledge":[11],"the":[13,29,53,139,148],"being":[15],"studied.":[16],"We":[17,116],"describe":[18],"three":[19,132],"methods":[20,38,88,119],"for":[21],"building":[22],"phylogenetic":[23],"trees":[24],"and":[25,66,107,124,141,150,166,179,186,193],"clustering":[26,92,101,134],"without":[28],"use":[30],"language-specific":[32],"information.":[33],"input":[35],"to":[36,177],"our":[37,118],"is":[39,146],"a":[40,47,73,103,121],"set":[41,74],"document":[43,172],"vectors":[44,173],"trained":[45],"corpus":[48,70],"parallel":[50],"translations":[51],"Bible":[54,77],"into":[55,84],"22":[56],"Indo-European":[57],"languages,":[58],"representing":[59],"4":[60],"language":[61,96,114,125,191],"families:":[62],"Indo-Iranian,":[63],"Slavic,":[64],"Germanic,":[65],"Romance.":[67],"This":[68,169],"text":[69],"consists":[71],"532,092":[76],"verses,":[78],"with":[79],"24,186":[80],"identical":[81],"verses":[82],"translated":[83],"each":[85],"language.":[86],"are":[89,159],"(A)":[90],"hierarchical":[91,100],"using":[93,102,120],"distance":[94,105],"vector":[97],"centroids,":[98],"(B)":[99],"network-derived":[104],"measure,":[106],"(C)":[108],"Deep":[109],"Embedded":[110],"Clustering":[111],"(DEC)":[112],"vectors.":[115],"evaluate":[117],"ground-truth":[122],"tree":[123],"families":[126,158],"derived":[127],"from":[128],"said":[129],"tree.":[130],"All":[131],"achieve":[133],"F-scores":[135,155],"above":[136],"0.9":[137],"Indo-Iranian":[140],"Slavic":[142],"families;":[143],"most":[144],"confusion":[145],"Germanic":[149],"Romance":[151],"families.":[152],"mean":[154],"across":[156],"all":[157],"0.864":[160],"(centroid":[161],"clustering),":[162],"0.953":[163],"(network":[164],"partitioning),":[165],"0.763":[167],"(DEC).":[168],"shows":[170],"that":[171],"can":[174],"be":[175],"used":[176],"capture":[178],"compare":[180],"linguistic":[181],"features":[182],"multilingual":[184],"texts,":[185],"thus":[187],"could":[188],"help":[189],"extend":[190],"similarity":[192],"other":[194],"translation":[195],"studies":[196],"research.":[197]},"counts_by_year":[{"year":2025,"cited_by_count":3}],"updated_date":"2026-04-18T07:56:08.524223","created_date":"2025-10-10T00:00:00"}
