{"id":"https://openalex.org/W4387019089","doi":"https://doi.org/10.3390/bdcc7040158","title":"Ensemble-Based Short Text Similarity: An Easy Approach for Multilingual Datasets Using Transformers and WordNet in Real-World Scenarios","display_name":"Ensemble-Based Short Text Similarity: An Easy Approach for Multilingual Datasets Using Transformers and WordNet in Real-World Scenarios","publication_year":2023,"publication_date":"2023-09-25","ids":{"openalex":"https://openalex.org/W4387019089","doi":"https://doi.org/10.3390/bdcc7040158"},"language":"en","primary_location":{"id":"doi:10.3390/bdcc7040158","is_oa":true,"landing_page_url":"https://doi.org/10.3390/bdcc7040158","pdf_url":"https://www.mdpi.com/2504-2289/7/4/158/pdf?version=1695631519","source":{"id":"https://openalex.org/S4210238752","display_name":"Big Data and Cognitive Computing","issn_l":"2504-2289","issn":["2504-2289"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Big Data and Cognitive Computing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.mdpi.com/2504-2289/7/4/158/pdf?version=1695631519","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5086317084","display_name":"Isabella Gagliardi","orcid":"https://orcid.org/0000-0002-4667-919X"},"institutions":[{"id":"https://openalex.org/I4210137033","display_name":"Istituto di Matematica Applicata e Tecnologie Informatiche","ror":"https://ror.org/03m0n3c07","country_code":"IT","type":"education","lineage":["https://openalex.org/I4210137033","https://openalex.org/I4210155236"]},{"id":"https://openalex.org/I4210155236","display_name":"National Research Council","ror":"https://ror.org/04zaypm56","country_code":"IT","type":"nonprofit","lineage":["https://openalex.org/I4210155236"]}],"countries":["IT"],"is_corresponding":true,"raw_author_name":"Isabella Gagliardi","raw_affiliation_strings":["Institute for Applied Mathematics and Information Technologies, National Research Council of Italy (IMATI\u2014CNR), 20133 Milan, Italy"],"raw_orcid":"https://orcid.org/0000-0002-4667-919X","affiliations":[{"raw_affiliation_string":"Institute for Applied Mathematics and Information Technologies, National Research Council of Italy (IMATI\u2014CNR), 20133 Milan, Italy","institution_ids":["https://openalex.org/I4210155236","https://openalex.org/I4210137033"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5031623078","display_name":"Maria Teresa Artese","orcid":null},"institutions":[{"id":"https://openalex.org/I4210137033","display_name":"Istituto di Matematica Applicata e Tecnologie Informatiche","ror":"https://ror.org/03m0n3c07","country_code":"IT","type":"education","lineage":["https://openalex.org/I4210137033","https://openalex.org/I4210155236"]},{"id":"https://openalex.org/I4210155236","display_name":"National Research Council","ror":"https://ror.org/04zaypm56","country_code":"IT","type":"nonprofit","lineage":["https://openalex.org/I4210155236"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Maria Teresa Artese","raw_affiliation_strings":["Institute for Applied Mathematics and Information Technologies, National Research Council of Italy (IMATI\u2014CNR), 20133 Milan, Italy"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Institute for Applied Mathematics and Information Technologies, National Research Council of Italy (IMATI\u2014CNR), 20133 Milan, Italy","institution_ids":["https://openalex.org/I4210155236","https://openalex.org/I4210137033"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5086317084"],"corresponding_institution_ids":["https://openalex.org/I4210137033","https://openalex.org/I4210155236"],"apc_list":{"value":1400,"currency":"CHF","value_usd":1515},"apc_paid":{"value":1400,"currency":"CHF","value_usd":1515},"fwci":1.1597,"has_fulltext":true,"cited_by_count":7,"citation_normalized_percentile":{"value":0.82956859,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":99},"biblio":{"volume":"7","issue":"4","first_page":"158","last_page":"158"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13083","display_name":"Advanced Text Analysis Techniques","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8356345891952515},{"id":"https://openalex.org/keywords/wordnet","display_name":"WordNet","score":0.7247350215911865},{"id":"https://openalex.org/keywords/granularity","display_name":"Granularity","score":0.6073827743530273},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.5750070810317993},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.5434456467628479},{"id":"https://openalex.org/keywords/personalization","display_name":"Personalization","score":0.5360797643661499},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.49300333857536316},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.4862166941165924},{"id":"https://openalex.org/keywords/cultural-heritage","display_name":"Cultural heritage","score":0.4382035732269287},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.3904590606689453},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3710182309150696},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3631390333175659},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.22879734635353088},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.21186235547065735}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8356345891952515},{"id":"https://openalex.org/C157659113","wikidata":"https://www.wikidata.org/wiki/Q533822","display_name":"WordNet","level":2,"score":0.7247350215911865},{"id":"https://openalex.org/C177774035","wikidata":"https://www.wikidata.org/wiki/Q1246948","display_name":"Granularity","level":2,"score":0.6073827743530273},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.5750070810317993},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.5434456467628479},{"id":"https://openalex.org/C183003079","wikidata":"https://www.wikidata.org/wiki/Q1000371","display_name":"Personalization","level":2,"score":0.5360797643661499},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.49300333857536316},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.4862166941165924},{"id":"https://openalex.org/C60671577","wikidata":"https://www.wikidata.org/wiki/Q210272","display_name":"Cultural heritage","level":2,"score":0.4382035732269287},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3904590606689453},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3710182309150696},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3631390333175659},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.22879734635353088},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.21186235547065735},{"id":"https://openalex.org/C95457728","wikidata":"https://www.wikidata.org/wiki/Q309","display_name":"History","level":0,"score":0.0},{"id":"https://openalex.org/C166957645","wikidata":"https://www.wikidata.org/wiki/Q23498","display_name":"Archaeology","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.3390/bdcc7040158","is_oa":true,"landing_page_url":"https://doi.org/10.3390/bdcc7040158","pdf_url":"https://www.mdpi.com/2504-2289/7/4/158/pdf?version=1695631519","source":{"id":"https://openalex.org/S4210238752","display_name":"Big Data and Cognitive Computing","issn_l":"2504-2289","issn":["2504-2289"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Big Data and Cognitive Computing","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:4580829b012b4f09b3f564643aa390a4","is_oa":true,"landing_page_url":"https://doaj.org/article/4580829b012b4f09b3f564643aa390a4","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Big Data and Cognitive Computing, Vol 7, Iss 4, p 158 (2023)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.3390/bdcc7040158","is_oa":true,"landing_page_url":"https://doi.org/10.3390/bdcc7040158","pdf_url":"https://www.mdpi.com/2504-2289/7/4/158/pdf?version=1695631519","source":{"id":"https://openalex.org/S4210238752","display_name":"Big Data and Cognitive Computing","issn_l":"2504-2289","issn":["2504-2289"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Big Data and Cognitive Computing","raw_type":"journal-article"},"sustainable_development_goals":[{"score":0.6299999952316284,"id":"https://metadata.un.org/sdg/11","display_name":"Sustainable cities and communities"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4387019089.pdf"},"referenced_works_count":33,"referenced_works":["https://openalex.org/W1854884267","https://openalex.org/W2028742638","https://openalex.org/W2067438047","https://openalex.org/W2081580037","https://openalex.org/W2171313960","https://openalex.org/W2188347037","https://openalex.org/W2461338233","https://openalex.org/W2529667321","https://openalex.org/W2953411641","https://openalex.org/W2979826702","https://openalex.org/W2989868186","https://openalex.org/W3021792233","https://openalex.org/W3024449834","https://openalex.org/W3081886688","https://openalex.org/W3090556797","https://openalex.org/W3100299802","https://openalex.org/W3126216173","https://openalex.org/W3130880317","https://openalex.org/W3156228691","https://openalex.org/W3169554260","https://openalex.org/W3170333377","https://openalex.org/W4200604551","https://openalex.org/W4213009331","https://openalex.org/W4224311194","https://openalex.org/W4239973675","https://openalex.org/W4280647260","https://openalex.org/W4295857769","https://openalex.org/W4310568840","https://openalex.org/W4362591703","https://openalex.org/W6687181900","https://openalex.org/W6728658216","https://openalex.org/W6785227713","https://openalex.org/W6796215775"],"related_works":["https://openalex.org/W2900382651","https://openalex.org/W1981879262","https://openalex.org/W2363417484","https://openalex.org/W4225863708","https://openalex.org/W1480103567","https://openalex.org/W1599970036","https://openalex.org/W1849827364","https://openalex.org/W2786299737","https://openalex.org/W115324854","https://openalex.org/W4211187494"],"abstract_inverted_index":{"When":[0],"integrating":[1],"data":[2],"from":[3,43],"different":[4,11,16,44,93,106,119],"sources,":[5],"there":[6],"are":[7],"problems":[8],"of":[9,15,31,40,56,64,67],"synonymy,":[10],"languages,":[12],"and":[13,46,51,79,117,123],"concepts":[14],"granularity.":[17],"This":[18],"paper":[19,102],"proposes":[20],"a":[21,89,142],"simple":[22],"yet":[23],"effective":[24],"approach":[25,58,134],"to":[26,87,104,108,127,131,149],"evaluate":[27,116,150],"the":[28,57,65,133,139,151],"semantic":[29],"similarity":[30],"short":[32],"texts,":[33],"especially":[34],"keywords.":[35],"The":[36,84,101],"method":[37],"is":[38],"capable":[39],"matching":[41],"keywords":[42],"sources":[45],"languages":[47,78],"by":[48],"exploiting":[49],"transformers":[50],"WordNet-based":[52],"methods.":[53],"Key":[54],"features":[55],"include":[59],"its":[60],"unsupervised":[61],"pipeline,":[62],"mitigation":[63],"lack":[66],"context":[68],"in":[69,111],"keywords,":[70],"scalability":[71],"for":[72,76,92,145],"large":[73],"archives,":[74,148],"support":[75],"multiple":[77],"real-world":[80],"scenarios":[81],"adaptation":[82],"capabilities.":[83],"work":[85],"aims":[86,103],"provide":[88],"versatile":[90],"tool":[91],"cultural":[94,146],"heritage":[95,147],"archives":[96],"without":[97],"requiring":[98],"complex":[99],"customization.":[100],"explore":[105],"approaches":[107],"identifying":[109],"similarities":[110],"1-":[112],"or":[113],"n-gram":[114],"tags,":[115],"compare":[118],"pre-trained":[120],"language":[121],"models,":[122],"define":[124],"integrated":[125],"methods":[126],"overcome":[128],"limitations.":[129],"Tests":[130],"validate":[132],"have":[135],"been":[136],"conducted":[137],"using":[138],"QueryLab":[140],"portal,":[141],"search":[143],"engine":[144],"proposed":[152],"pipeline.":[153]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":2}],"updated_date":"2026-05-21T06:26:12.895304","created_date":"2025-10-10T00:00:00"}
