{"id":"https://openalex.org/W4406458473","doi":"https://doi.org/10.1109/bigdata62323.2024.10825708","title":"Optimizing Semantic Joinability in Heterogeneous Data: A Triplet-Based Approach with Pre-trained Deep Learning Models","display_name":"Optimizing Semantic Joinability in Heterogeneous Data: A Triplet-Based Approach with Pre-trained Deep Learning Models","publication_year":2024,"publication_date":"2024-12-15","ids":{"openalex":"https://openalex.org/W4406458473","doi":"https://doi.org/10.1109/bigdata62323.2024.10825708"},"language":"en","primary_location":{"id":"doi:10.1109/bigdata62323.2024.10825708","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata62323.2024.10825708","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Big Data (BigData)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5014881766","display_name":"M. Pedersen","orcid":null},"institutions":[{"id":"https://openalex.org/I96673099","display_name":"Technical University of Denmark","ror":"https://ror.org/04qtj9h94","country_code":"DK","type":"education","lineage":["https://openalex.org/I96673099"]}],"countries":["DK"],"is_corresponding":true,"raw_author_name":"Magnus Guldberg Pedersen","raw_affiliation_strings":["Technical University of Denmark,DTU Compute,Kongens Lyngby,Denmark"],"affiliations":[{"raw_affiliation_string":"Technical University of Denmark,DTU Compute,Kongens Lyngby,Denmark","institution_ids":["https://openalex.org/I96673099"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5115904521","display_name":"Benjamin Kock Fazal","orcid":null},"institutions":[{"id":"https://openalex.org/I96673099","display_name":"Technical University of Denmark","ror":"https://ror.org/04qtj9h94","country_code":"DK","type":"education","lineage":["https://openalex.org/I96673099"]}],"countries":["DK"],"is_corresponding":false,"raw_author_name":"Benjamin Kock Fazal","raw_affiliation_strings":["Technical University of Denmark,DTU Compute,Kongens Lyngby,Denmark"],"affiliations":[{"raw_affiliation_string":"Technical University of Denmark,DTU Compute,Kongens Lyngby,Denmark","institution_ids":["https://openalex.org/I96673099"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5111021151","display_name":"Kyoung-Sook Kim","orcid":null},"institutions":[{"id":"https://openalex.org/I73613424","display_name":"National Institute of Advanced Industrial Science and Technology","ror":"https://ror.org/01703db54","country_code":"JP","type":"government","lineage":["https://openalex.org/I73613424"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Kyoung-Sook Kim","raw_affiliation_strings":["National Institute of Advanced Industrial Science and Technology,Artificial Intelligence Research Center,Tokyo,Japan"],"affiliations":[{"raw_affiliation_string":"National Institute of Advanced Industrial Science and Technology,Artificial Intelligence Research Center,Tokyo,Japan","institution_ids":["https://openalex.org/I73613424"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5014881766"],"corresponding_institution_ids":["https://openalex.org/I96673099"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.23727974,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"6092","last_page":"6100"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11512","display_name":"Anomaly Detection Techniques and Applications","score":0.9976999759674072,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9900000095367432,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8198029398918152},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.614210307598114},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.5584356188774109},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.46321043372154236},{"id":"https://openalex.org/keywords/data-modeling","display_name":"Data modeling","score":0.4416424334049225},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.38399630784988403},{"id":"https://openalex.org/keywords/software-engineering","display_name":"Software engineering","score":0.09144669771194458}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8198029398918152},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.614210307598114},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.5584356188774109},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.46321043372154236},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.4416424334049225},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.38399630784988403},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.09144669771194458}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/bigdata62323.2024.10825708","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata62323.2024.10825708","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Big Data (BigData)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":48,"referenced_works":["https://openalex.org/W25566965","https://openalex.org/W2048239613","https://openalex.org/W2096733369","https://openalex.org/W2159950912","https://openalex.org/W2194775991","https://openalex.org/W2476527430","https://openalex.org/W2493916176","https://openalex.org/W2521039518","https://openalex.org/W2575343689","https://openalex.org/W2769419222","https://openalex.org/W2792643794","https://openalex.org/W2908510526","https://openalex.org/W2948163032","https://openalex.org/W2954996726","https://openalex.org/W2978017171","https://openalex.org/W2990859184","https://openalex.org/W3004612262","https://openalex.org/W3034303554","https://openalex.org/W3035140194","https://openalex.org/W3129366682","https://openalex.org/W3174637548","https://openalex.org/W3194730353","https://openalex.org/W4205922070","https://openalex.org/W4221156845","https://openalex.org/W4225631575","https://openalex.org/W4283318146","https://openalex.org/W4286921189","https://openalex.org/W4313433600","https://openalex.org/W4365456672","https://openalex.org/W4380433117","https://openalex.org/W4385573414","https://openalex.org/W4385653220","https://openalex.org/W4389475429","https://openalex.org/W4389765840","https://openalex.org/W4393145600","https://openalex.org/W4399258649","https://openalex.org/W4400341480","https://openalex.org/W6638217793","https://openalex.org/W6678914141","https://openalex.org/W6723250868","https://openalex.org/W6746311579","https://openalex.org/W6749029207","https://openalex.org/W6757817989","https://openalex.org/W6768851824","https://openalex.org/W6802128326","https://openalex.org/W6809446827","https://openalex.org/W6811106525","https://openalex.org/W6869665400"],"related_works":["https://openalex.org/W2731899572","https://openalex.org/W2961085424","https://openalex.org/W3215138031","https://openalex.org/W4306674287","https://openalex.org/W3009238340","https://openalex.org/W4360585206","https://openalex.org/W4321369474","https://openalex.org/W4285208911","https://openalex.org/W4387369504","https://openalex.org/W3082895349"],"abstract_inverted_index":{"This":[0],"paper":[1],"presents":[2],"a":[3,32,46,129],"novel":[4],"approach":[5,34,40,144],"to":[6,54,145],"optimizing":[7,150],"semantic":[8,66],"joinability":[9,56,125],"in":[10,19,92,153],"heterogeneous":[11],"data,":[12],"leveraging":[13],"embedding":[14,30,109],"techniques":[15,100],"and":[16,37,50,94,131,149],"deep":[17],"learning":[18],"the":[20,85,102,106],"context":[21],"of":[22,74,108],"big":[23],"data":[24,48,59,64,113,136,147],"environments.":[25],"We":[26],"propose":[27],"two":[28],"distinct":[29],"strategies:":[31],"text-based":[33],"using":[35,45],"DistilBERT":[36],"an":[38],"image-based":[39,103],"utilizing":[41],"ResNet50,":[42],"both":[43],"fine-tuned":[44,86,122],"triplet":[47],"structure":[49],"circle":[51],"loss":[52],"functions":[53],"enhance":[55],"predictions":[57],"from":[58],"lakes.":[60],"By":[61],"transforming":[62],"tabular":[63],"into":[65],"embeddings,":[67],"our":[68],"method":[69,104],"facilitates":[70],"more":[71],"effective":[72],"integration":[73],"large,":[75],"diverse":[76],"datasets.":[77],"Experiments":[78],"conducted":[79],"on":[80],"large-scale":[81,154],"datasets":[82],"show":[83],"that":[84,118],"models":[87],"significantly":[88],"outperform":[89],"baseline":[90],"approaches":[91],"accuracy":[93],"robustness.":[95],"Furthermore,":[96],"incorporating":[97],"computer":[98],"vision":[99],"via":[101],"demonstrates":[105],"versatility":[107],"strategies":[110],"across":[111],"different":[112],"types.":[114],"The":[115],"results":[116],"suggest":[117],"pre-trained":[119],"models,":[120],"when":[121],"for":[123,134],"specific":[124],"tasks,":[126],"can":[127],"provide":[128],"scalable":[130],"efficient":[132],"solution":[133],"extensive":[135],"integration.":[137],"Future":[138],"work":[139],"will":[140],"explore":[141],"expanding":[142],"this":[143],"additional":[146],"modalities":[148],"model":[151],"performance":[152],"applications.":[155]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
