{"id":"https://openalex.org/W7092289972","doi":"https://doi.org/10.1109/access.2025.3622752","title":"Lightweight Semantic Similarity Search of Data Products","display_name":"Lightweight Semantic Similarity Search of Data Products","publication_year":2025,"publication_date":"2025-01-01","ids":{"openalex":"https://openalex.org/W7092289972","doi":"https://doi.org/10.1109/access.2025.3622752"},"language":"en","primary_location":{"id":"doi:10.1109/access.2025.3622752","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2025.3622752","pdf_url":null,"source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1109/access.2025.3622752","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Antonio D\u2019Ambrosio","orcid":null},"institutions":[{"id":"https://openalex.org/I4210131864","display_name":"Gilead Sciences (Italy)","ror":"https://ror.org/03hxqcn24","country_code":"IT","type":"company","lineage":["https://openalex.org/I4210131864","https://openalex.org/I4210140816"]}],"countries":["IT"],"is_corresponding":true,"raw_author_name":"Antonio D\u2019Ambrosio","raw_affiliation_strings":["AgileLab S.r.l., Milan, Italy"],"affiliations":[{"raw_affiliation_string":"AgileLab S.r.l., Milan, Italy","institution_ids":["https://openalex.org/I4210131864"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Paolo Platter","orcid":null},"institutions":[{"id":"https://openalex.org/I4210131864","display_name":"Gilead Sciences (Italy)","ror":"https://ror.org/03hxqcn24","country_code":"IT","type":"company","lineage":["https://openalex.org/I4210131864","https://openalex.org/I4210140816"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Paolo Platter","raw_affiliation_strings":["AgileLab S.r.l., Milan, Italy"],"affiliations":[{"raw_affiliation_string":"AgileLab S.r.l., Milan, Italy","institution_ids":["https://openalex.org/I4210131864"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Martina Salis","orcid":"https://orcid.org/0009-0007-4677-0739"},"institutions":[{"id":"https://openalex.org/I172446870","display_name":"University of Cagliari","ror":"https://ror.org/003109y17","country_code":"IT","type":"education","lineage":["https://openalex.org/I172446870"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Martina Salis","raw_affiliation_strings":["Department of Mathematics and Computer Science, University of Cagliari, Cagliari, Italy"],"affiliations":[{"raw_affiliation_string":"Department of Mathematics and Computer Science, University of Cagliari, Cagliari, Italy","institution_ids":["https://openalex.org/I172446870"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Francesco Simbola","orcid":"https://orcid.org/0009-0006-6470-6223"},"institutions":[{"id":"https://openalex.org/I172446870","display_name":"University of Cagliari","ror":"https://ror.org/003109y17","country_code":"IT","type":"education","lineage":["https://openalex.org/I172446870"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Francesco Simbola","raw_affiliation_strings":["Department of Mathematics and Computer Science, University of Cagliari, Cagliari, Italy"],"affiliations":[{"raw_affiliation_string":"Department of Mathematics and Computer Science, University of Cagliari, Cagliari, Italy","institution_ids":["https://openalex.org/I172446870"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Diego Reforgiato Recupero","orcid":"https://orcid.org/0000-0001-8646-6183"},"institutions":[{"id":"https://openalex.org/I172446870","display_name":"University of Cagliari","ror":"https://ror.org/003109y17","country_code":"IT","type":"education","lineage":["https://openalex.org/I172446870"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Diego Reforgiato Recupero","raw_affiliation_strings":["Department of Mathematics and Computer Science, University of Cagliari, Cagliari, Italy"],"affiliations":[{"raw_affiliation_string":"Department of Mathematics and Computer Science, University of Cagliari, Cagliari, Italy","institution_ids":["https://openalex.org/I172446870"]}]},{"author_position":"last","author":{"id":null,"display_name":"Daniele Riboni","orcid":"https://orcid.org/0000-0002-0695-2040"},"institutions":[{"id":"https://openalex.org/I172446870","display_name":"University of Cagliari","ror":"https://ror.org/003109y17","country_code":"IT","type":"education","lineage":["https://openalex.org/I172446870"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Daniele Riboni","raw_affiliation_strings":["Department of Mathematics and Computer Science, University of Cagliari, Cagliari, Italy"],"affiliations":[{"raw_affiliation_string":"Department of Mathematics and Computer Science, University of Cagliari, Cagliari, Italy","institution_ids":["https://openalex.org/I172446870"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I4210131864"],"apc_list":{"value":1850,"currency":"USD","value_usd":1850},"apc_paid":{"value":1850,"currency":"USD","value_usd":1850},"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.60001485,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"13","issue":null,"first_page":"180647","last_page":"180661"},"is_retracted":false,"is_paratext":false,"is_xpac":true,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9272000193595886,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9272000193595886,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10215","display_name":"Semantic Web and Ontologies","score":0.008899999782443047,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.005799999926239252,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/metadata","display_name":"Metadata","score":0.6316999793052673},{"id":"https://openalex.org/keywords/relational-database","display_name":"Relational database","score":0.5256999731063843},{"id":"https://openalex.org/keywords/semantic-similarity","display_name":"Semantic similarity","score":0.46650001406669617},{"id":"https://openalex.org/keywords/data-modeling","display_name":"Data modeling","score":0.4514000117778778},{"id":"https://openalex.org/keywords/metadata-modeling","display_name":"Metadata modeling","score":0.41769999265670776},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.4163999855518341},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.39500001072883606},{"id":"https://openalex.org/keywords/data-set","display_name":"Data set","score":0.39419999718666077},{"id":"https://openalex.org/keywords/big-data","display_name":"Big data","score":0.39089998602867126},{"id":"https://openalex.org/keywords/data-deduplication","display_name":"Data deduplication","score":0.37130001187324524}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8569999933242798},{"id":"https://openalex.org/C93518851","wikidata":"https://www.wikidata.org/wiki/Q180160","display_name":"Metadata","level":2,"score":0.6316999793052673},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.5838000178337097},{"id":"https://openalex.org/C5655090","wikidata":"https://www.wikidata.org/wiki/Q192588","display_name":"Relational database","level":2,"score":0.5256999731063843},{"id":"https://openalex.org/C130318100","wikidata":"https://www.wikidata.org/wiki/Q2268914","display_name":"Semantic similarity","level":2,"score":0.46650001406669617},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.46140000224113464},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.4514000117778778},{"id":"https://openalex.org/C110326360","wikidata":"https://www.wikidata.org/wiki/Q17149476","display_name":"Metadata modeling","level":4,"score":0.41769999265670776},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.4163999855518341},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.39500001072883606},{"id":"https://openalex.org/C58489278","wikidata":"https://www.wikidata.org/wiki/Q1172284","display_name":"Data set","level":2,"score":0.39419999718666077},{"id":"https://openalex.org/C75684735","wikidata":"https://www.wikidata.org/wiki/Q858810","display_name":"Big data","level":2,"score":0.39089998602867126},{"id":"https://openalex.org/C32587265","wikidata":"https://www.wikidata.org/wiki/Q1182260","display_name":"Data deduplication","level":2,"score":0.37130001187324524},{"id":"https://openalex.org/C196621874","wikidata":"https://www.wikidata.org/wiki/Q385210","display_name":"IDEF1X","level":4,"score":0.36079999804496765},{"id":"https://openalex.org/C90312973","wikidata":"https://www.wikidata.org/wiki/Q7449052","display_name":"Semantic data model","level":2,"score":0.34779998660087585},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.336899995803833},{"id":"https://openalex.org/C72634772","wikidata":"https://www.wikidata.org/wiki/Q386824","display_name":"Data integration","level":2,"score":0.33399999141693115},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.31869998574256897},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.31380000710487366},{"id":"https://openalex.org/C5968703","wikidata":"https://www.wikidata.org/wiki/Q267136","display_name":"Database model","level":3,"score":0.3037000000476837},{"id":"https://openalex.org/C2779489174","wikidata":"https://www.wikidata.org/wiki/Q6822246","display_name":"Metadata management","level":3,"score":0.3005000054836273},{"id":"https://openalex.org/C33762810","wikidata":"https://www.wikidata.org/wiki/Q461671","display_name":"Data integrity","level":2,"score":0.29980000853538513},{"id":"https://openalex.org/C203702819","wikidata":"https://www.wikidata.org/wiki/Q17146953","display_name":"Logical data model","level":3,"score":0.2939000129699707},{"id":"https://openalex.org/C103692084","wikidata":"https://www.wikidata.org/wiki/Q1765824","display_name":"Semantic grid","level":3,"score":0.29109999537467957},{"id":"https://openalex.org/C2778864079","wikidata":"https://www.wikidata.org/wiki/Q173285","display_name":"Digital data","level":3,"score":0.29030001163482666},{"id":"https://openalex.org/C1668388","wikidata":"https://www.wikidata.org/wiki/Q1149776","display_name":"Data management","level":2,"score":0.28940001130104065},{"id":"https://openalex.org/C90673727","wikidata":"https://www.wikidata.org/wiki/Q901718","display_name":"Product (mathematics)","level":2,"score":0.28700000047683716},{"id":"https://openalex.org/C30872290","wikidata":"https://www.wikidata.org/wiki/Q1172389","display_name":"Data element","level":3,"score":0.2865000069141388},{"id":"https://openalex.org/C137314826","wikidata":"https://www.wikidata.org/wiki/Q2330408","display_name":"Data mapping","level":2,"score":0.2833000123500824},{"id":"https://openalex.org/C153048206","wikidata":"https://www.wikidata.org/wiki/Q3454922","display_name":"Metadata repository","level":3,"score":0.28220000863075256},{"id":"https://openalex.org/C40207289","wikidata":"https://www.wikidata.org/wiki/Q755662","display_name":"Relational model","level":3,"score":0.27959999442100525},{"id":"https://openalex.org/C100463513","wikidata":"https://www.wikidata.org/wiki/Q5227322","display_name":"Data model (GIS)","level":2,"score":0.2768999934196472},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.2689000070095062},{"id":"https://openalex.org/C54239708","wikidata":"https://www.wikidata.org/wiki/Q1329910","display_name":"View","level":3,"score":0.267300009727478},{"id":"https://openalex.org/C2780586970","wikidata":"https://www.wikidata.org/wiki/Q1357284","display_name":"Popularity","level":2,"score":0.2644999921321869},{"id":"https://openalex.org/C2778816267","wikidata":"https://www.wikidata.org/wiki/Q21015578","display_name":"Semantic query","level":4,"score":0.25920000672340393},{"id":"https://openalex.org/C551230270","wikidata":"https://www.wikidata.org/wiki/Q4368942","display_name":"Data retrieval","level":2,"score":0.25099998712539673}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/access.2025.3622752","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2025.3622752","pdf_url":null,"source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:fcd3204b7afe409d834cc193f71a824e","is_oa":true,"landing_page_url":"https://doaj.org/article/fcd3204b7afe409d834cc193f71a824e","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"IEEE Access, Vol 13, Pp 180647-180661 (2025)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1109/access.2025.3622752","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2025.3622752","pdf_url":null,"source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"sustainable_development_goals":[{"score":0.4822736382484436,"id":"https://metadata.un.org/sdg/9","display_name":"Industry, innovation and infrastructure"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"The":[0],"increasing":[1],"popularity":[2],"of":[3,14,26,70,122,166,172,190],"digital":[4],"services":[5],"provided":[6],"by":[7],"large":[8],"enterprises":[9],"has":[10],"determined":[11],"a":[12,31,88,164,176],"proliferation":[13],"data":[15,19,28,35,97,148,167],"products,":[16,168],"determining":[17],"several":[18],"management":[20],"issues.":[21],"Among":[22],"them,":[23],"the":[24,68,105,118,170,180,186],"recognition":[25],"overlapping":[27],"products":[29,149],"is":[30,125,141],"major":[32],"challenge,":[33],"since":[34],"product":[36,98],"duplication":[37],"can":[38],"lead":[39],"to":[40,53,92,110,128,144],"inefficiencies,":[41],"unnecessary":[42],"overhead,":[43],"and":[44,104,124,179,188],"customer":[45],"confusion.":[46],"Previous":[47],"works":[48,66],"have":[49,151],"proposed":[50],"different":[51],"methods":[52],"search":[54,78],"for":[55,75],"joinability":[56],"or":[57,146],"semantic":[58,76],"similarity":[59,77,173],"in":[60,79,96],"relational":[61],"database":[62],"systems.":[63],"Other":[64],"recent":[65],"explored":[67],"use":[69],"Large":[71],"Language":[72],"Models":[73],"(LLMs)":[74],"Big":[80],"Data":[81],"repositories.":[82],"In":[83],"this":[84],"work,":[85],"we":[86],"propose":[87],"novel,":[89],"lightweight":[90],"technique":[91],"find":[93],"possible":[94],"duplications":[95],"catalogs,":[99],"exploiting":[100],"pre-trained":[101],"transformer":[102],"models":[103],"Hungarian":[106],"algorithm.":[107],"With":[108],"respect":[109],"previous":[111],"works,":[112],"our":[113,139,191],"method":[114],"does":[115],"not":[116,152],"incur":[117],"high":[119],"computational":[120,181],"overhead":[121],"LLMs,":[123],"flexible":[126],"enough":[127],"also":[129,143],"detect":[130],"partial":[131],"duplication.":[132],"Moreover,":[133],"being":[134],"based":[135],"on":[136],"metadata":[137],"analysis,":[138],"system":[140],"applicable":[142],"prospective":[145],"new":[147],"that":[150],"yet":[153],"been":[154],"instantiated":[155],"with":[156,163],"data.":[157],"We":[158],"conducted":[159],"an":[160],"experimental":[161],"evaluation":[162],"set":[165],"measuring":[169],"accuracy":[171],"detection":[174],"against":[175],"gold":[177],"standard":[178],"cost.":[182],"Experimental":[183],"results":[184],"support":[185],"effectiveness":[187],"efficiency":[189],"solution.":[192]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-18T00:00:00"}
