{"id":"https://openalex.org/W4409901427","doi":"https://doi.org/10.1007/s00778-025-00917-9","title":"Table integration in data lakes unleashed: pairwise integrability judgment, integrable set discovery, and multi-tuple conflict resolution","display_name":"Table integration in data lakes unleashed: pairwise integrability judgment, integrable set discovery, and multi-tuple conflict resolution","publication_year":2025,"publication_date":"2025-04-28","ids":{"openalex":"https://openalex.org/W4409901427","doi":"https://doi.org/10.1007/s00778-025-00917-9"},"language":"en","primary_location":{"id":"doi:10.1007/s00778-025-00917-9","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s00778-025-00917-9","pdf_url":"https://link.springer.com/content/pdf/10.1007/s00778-025-00917-9.pdf","source":{"id":"https://openalex.org/S78926909","display_name":"The VLDB Journal","issn_l":"0949-877X","issn":["0949-877X","1066-8888"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"The VLDB Journal","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://link.springer.com/content/pdf/10.1007/s00778-025-00917-9.pdf","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5047487326","display_name":"Daomin Ji","orcid":"https://orcid.org/0009-0000-0037-3614"},"institutions":[{"id":"https://openalex.org/I82951845","display_name":"RMIT University","ror":"https://ror.org/04ttjf776","country_code":"AU","type":"education","lineage":["https://openalex.org/I82951845"]}],"countries":["AU"],"is_corresponding":true,"raw_author_name":"Daomin Ji","raw_affiliation_strings":["RMIT University, Melbourne, Australia"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"RMIT University, Melbourne, Australia","institution_ids":["https://openalex.org/I82951845"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5037728584","display_name":"Hui Luo","orcid":"https://orcid.org/0000-0002-7299-031X"},"institutions":[{"id":"https://openalex.org/I204824540","display_name":"University of Wollongong","ror":"https://ror.org/00jtmb277","country_code":"AU","type":"education","lineage":["https://openalex.org/I204824540"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Hui Luo","raw_affiliation_strings":["University of Wollongong, Wollongong, Australia"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Wollongong, Wollongong, Australia","institution_ids":["https://openalex.org/I204824540"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5080660416","display_name":"Zhifeng Bao","orcid":"https://orcid.org/0000-0003-2477-381X"},"institutions":[{"id":"https://openalex.org/I82951845","display_name":"RMIT University","ror":"https://ror.org/04ttjf776","country_code":"AU","type":"education","lineage":["https://openalex.org/I82951845"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Zhifeng Bao","raw_affiliation_strings":["RMIT University, Melbourne, Australia"],"raw_orcid":"https://orcid.org/0000-0003-2477-381X","affiliations":[{"raw_affiliation_string":"RMIT University, Melbourne, Australia","institution_ids":["https://openalex.org/I82951845"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5037010515","display_name":"J. Shane Culpepper","orcid":null},"institutions":[{"id":"https://openalex.org/I165143802","display_name":"The University of Queensland","ror":"https://ror.org/00rqy9422","country_code":"AU","type":"education","lineage":["https://openalex.org/I165143802"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"J. Shane Culpepper","raw_affiliation_strings":["The University of Queensland, Brisbane, Australia"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"The University of Queensland, Brisbane, Australia","institution_ids":["https://openalex.org/I165143802"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5047487326"],"corresponding_institution_ids":["https://openalex.org/I82951845"],"apc_list":{"value":2290,"currency":"EUR","value_usd":2890},"apc_paid":{"value":2290,"currency":"EUR","value_usd":2890},"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.09750439,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"34","issue":"3","first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11106","display_name":"Data Management and Algorithms","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/tuple","display_name":"Tuple","score":0.7743415236473083},{"id":"https://openalex.org/keywords/pairwise-comparison","display_name":"Pairwise comparison","score":0.7132564783096313},{"id":"https://openalex.org/keywords/table","display_name":"Table (database)","score":0.6015763878822327},{"id":"https://openalex.org/keywords/integrable-system","display_name":"Integrable system","score":0.5436375141143799},{"id":"https://openalex.org/keywords/resolution","display_name":"Resolution (logic)","score":0.540496826171875},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.509212851524353},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.45845454931259155},{"id":"https://openalex.org/keywords/conflict-resolution","display_name":"Conflict resolution","score":0.43956056237220764},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.41196101903915405},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.3823891878128052},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.36761653423309326},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.26393887400627136},{"id":"https://openalex.org/keywords/discrete-mathematics","display_name":"Discrete mathematics","score":0.12135311961174011},{"id":"https://openalex.org/keywords/pure-mathematics","display_name":"Pure mathematics","score":0.09801405668258667},{"id":"https://openalex.org/keywords/sociology","display_name":"Sociology","score":0.08341458439826965},{"id":"https://openalex.org/keywords/social-science","display_name":"Social science","score":0.06420168280601501}],"concepts":[{"id":"https://openalex.org/C118930307","wikidata":"https://www.wikidata.org/wiki/Q600590","display_name":"Tuple","level":2,"score":0.7743415236473083},{"id":"https://openalex.org/C184898388","wikidata":"https://www.wikidata.org/wiki/Q1435712","display_name":"Pairwise comparison","level":2,"score":0.7132564783096313},{"id":"https://openalex.org/C45235069","wikidata":"https://www.wikidata.org/wiki/Q278425","display_name":"Table (database)","level":2,"score":0.6015763878822327},{"id":"https://openalex.org/C200741047","wikidata":"https://www.wikidata.org/wiki/Q1957758","display_name":"Integrable system","level":2,"score":0.5436375141143799},{"id":"https://openalex.org/C138268822","wikidata":"https://www.wikidata.org/wiki/Q1051925","display_name":"Resolution (logic)","level":2,"score":0.540496826171875},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.509212851524353},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.45845454931259155},{"id":"https://openalex.org/C21711469","wikidata":"https://www.wikidata.org/wiki/Q1194317","display_name":"Conflict resolution","level":2,"score":0.43956056237220764},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.41196101903915405},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.3823891878128052},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.36761653423309326},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.26393887400627136},{"id":"https://openalex.org/C118615104","wikidata":"https://www.wikidata.org/wiki/Q121416","display_name":"Discrete mathematics","level":1,"score":0.12135311961174011},{"id":"https://openalex.org/C202444582","wikidata":"https://www.wikidata.org/wiki/Q837863","display_name":"Pure mathematics","level":1,"score":0.09801405668258667},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.08341458439826965},{"id":"https://openalex.org/C36289849","wikidata":"https://www.wikidata.org/wiki/Q34749","display_name":"Social science","level":1,"score":0.06420168280601501},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1007/s00778-025-00917-9","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s00778-025-00917-9","pdf_url":"https://link.springer.com/content/pdf/10.1007/s00778-025-00917-9.pdf","source":{"id":"https://openalex.org/S78926909","display_name":"The VLDB Journal","issn_l":"0949-877X","issn":["0949-877X","1066-8888"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"The VLDB Journal","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1007/s00778-025-00917-9","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s00778-025-00917-9","pdf_url":"https://link.springer.com/content/pdf/10.1007/s00778-025-00917-9.pdf","source":{"id":"https://openalex.org/S78926909","display_name":"The VLDB Journal","issn_l":"0949-877X","issn":["0949-877X","1066-8888"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"The VLDB Journal","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G6393225816","display_name":null,"funder_award_id":"DP240101211","funder_id":"https://openalex.org/F4320334704","funder_display_name":"Australian Research Council"}],"funders":[{"id":"https://openalex.org/F4320334704","display_name":"Australian Research Council","ror":"https://ror.org/05mmh0f86"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4409901427.pdf","grobid_xml":"https://content.openalex.org/works/W4409901427.grobid-xml"},"referenced_works_count":53,"referenced_works":["https://openalex.org/W1521736627","https://openalex.org/W2012668444","https://openalex.org/W2081580037","https://openalex.org/W2089492940","https://openalex.org/W2095293504","https://openalex.org/W2111625757","https://openalex.org/W2131681506","https://openalex.org/W2164456230","https://openalex.org/W2164998314","https://openalex.org/W2165822541","https://openalex.org/W2217748804","https://openalex.org/W2250539671","https://openalex.org/W2295240344","https://openalex.org/W2368253142","https://openalex.org/W2404544029","https://openalex.org/W2424304400","https://openalex.org/W2428834396","https://openalex.org/W2493916176","https://openalex.org/W2537388716","https://openalex.org/W2612177096","https://openalex.org/W2612344091","https://openalex.org/W2616268242","https://openalex.org/W2775293231","https://openalex.org/W2808345493","https://openalex.org/W2889326796","https://openalex.org/W2891828758","https://openalex.org/W2905441753","https://openalex.org/W2948163032","https://openalex.org/W2971681342","https://openalex.org/W2995201943","https://openalex.org/W3029701880","https://openalex.org/W3093671642","https://openalex.org/W3099768174","https://openalex.org/W3103177583","https://openalex.org/W3106020963","https://openalex.org/W3161956575","https://openalex.org/W3174828871","https://openalex.org/W3176923149","https://openalex.org/W3180181113","https://openalex.org/W4250589301","https://openalex.org/W4281721601","https://openalex.org/W4282010430","https://openalex.org/W4286217654","https://openalex.org/W4301014524","https://openalex.org/W4317039121","https://openalex.org/W4379390735","https://openalex.org/W4385567149","https://openalex.org/W4386591587","https://openalex.org/W4397029689","https://openalex.org/W6600195168","https://openalex.org/W6600424091","https://openalex.org/W6815311649","https://openalex.org/W6834485778"],"related_works":["https://openalex.org/W2044787408","https://openalex.org/W4249228129","https://openalex.org/W2086585817","https://openalex.org/W4394635378","https://openalex.org/W2019140719","https://openalex.org/W155593081","https://openalex.org/W3104927070","https://openalex.org/W1506619710","https://openalex.org/W2738038633","https://openalex.org/W574916204"],"abstract_inverted_index":{"Abstract":[0],"Table":[1],"integration":[2],"aims":[3],"to":[4,102,127,138,168,179,185,194,218,279],"create":[5],"a":[6,27,43,69,99,121,158],"comprehensive":[7],"table":[8,70,172],"by":[9],"consolidating":[10],"tuples":[11,91,169],"containing":[12],"relevant":[13],"information.":[14],"In":[15],"this":[16,95],"work,":[17],"we":[18,97,119,200,258],"investigate":[19,180],"the":[20,78,104,111,145,171,187,211,232,281,289],"challenge":[21],"of":[22,52,106,113,147,285,291],"integrating":[23,225,292],"multiple":[24,90,226],"tables":[25,293],"from":[26],"data":[28,115,117,132,295],"lake,":[29],"focusing":[30],"on":[31,72,210,276],"three":[32],"core":[33],"tasks:":[34],"(1)":[35],"pairwise":[36,73,107,148,175],"integrability":[37,74,108,149],"judgment":[38],",":[39,62,85,199],"which":[40,63,86,130],"determines":[41],"whether":[42],"tuple":[44],"pair":[45],"is":[46],"integrable,":[47],"accounting":[48],"for":[49,234,241,255],"any":[50],"occurrences":[51],"semantic":[53],"equivalence":[54],"or":[55],"typographical":[56],"errors;":[57],"(2)":[58],"integrable":[59,66,152,188],"set":[60,153,189],"discovery":[61,190],"identifies":[64],"all":[65],"sets":[67],"in":[68,77,116,170,288],"based":[71],"judgments":[75],"established":[76],"first":[79],"task;":[80],"(3)":[81],"multi-tuple":[82,196],"conflict":[83,197],"resolution":[84,198],"resolves":[87],"conflicts":[88,221],"between":[89],"during":[92],"integration.":[93],"To":[94],"end,":[96],"train":[98],"binary":[100],"classifier":[101],"address":[103,186],"task":[105],"judgment.":[109],"Given":[110],"scarcity":[112],"labeled":[114,244],"lakes,":[118],"propose":[120],"self-supervised":[122],"adversarial":[123,136],"contrastive":[124],"learning":[125,205],"algorithm":[126],"perform":[128],"classification,":[129],"incorporates":[131],"augmentation":[133],"methods":[134],"and":[135,165,173,269,283],"examples":[137],"autonomously":[139],"generate":[140],"new":[141],"training":[142],"data.":[143],"Upon":[144],"output":[146],"judgment,":[150],"each":[151],"can":[154],"be":[155],"considered":[156],"as":[157],"community\u2014a":[159],"densely":[160],"connected":[161],"sub-graph":[162],"where":[163,243],"nodes":[164],"edges":[166],"correspond":[167],"their":[174],"integrability,":[176],"respectively\u2014we":[177],"proceed":[178],"various":[181],"community":[182],"detection":[183],"algorithms":[184],"objective.":[191],"Moving":[192],"forward":[193],"tackle":[195],"introduce":[201],"an":[202],"innovative":[203],"in-context":[204],"methodology.":[206],"This":[207],"approach":[208],"capitalizes":[209],"knowledge":[212],"embedded":[213],"within":[214,294],"large":[215],"language":[216],"models":[217],"effectively":[219],"resolve":[220],"that":[222],"arise":[223],"when":[224],"tuples.":[227],"Notably,":[228],"our":[229,256,260,286],"method":[230],"minimizes":[231],"need":[233],"annotated":[235],"data,":[236],"making":[237],"it":[238],"particularly":[239],"suited":[240],"scenarios":[242],"datasets":[245],"are":[246,253],"scarce.":[247],"Since":[248],"no":[249],"suitable":[250],"test":[251],"collections":[252],"available":[254],"tasks,":[257],"develop":[259],"own":[261],"benchmarks":[262,278],"using":[263],"two":[264],"real-world":[265],"dataset":[266],"repositories:":[267],"Real":[268],"Join":[270],".":[271],"We":[272],"conduct":[273],"extensive":[274],"experiments":[275],"these":[277],"validate":[280],"robustness":[282],"applicability":[284],"methodologies":[287],"context":[290],"lakes.":[296]},"counts_by_year":[],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
