{"id":"https://openalex.org/W4400909727","doi":"https://doi.org/10.1109/icde60146.2024.00022","title":"Efficiently Estimating Mutual Information Between Attributes Across Tables","display_name":"Efficiently Estimating Mutual Information Between Attributes Across Tables","publication_year":2024,"publication_date":"2024-05-13","ids":{"openalex":"https://openalex.org/W4400909727","doi":"https://doi.org/10.1109/icde60146.2024.00022"},"language":"en","primary_location":{"id":"doi:10.1109/icde60146.2024.00022","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icde60146.2024.00022","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE 40th International Conference on Data Engineering (ICDE)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101972578","display_name":"A\u00e9cio Santos","orcid":"https://orcid.org/0000-0002-5124-7770"},"institutions":[{"id":"https://openalex.org/I57206974","display_name":"New York University","ror":"https://ror.org/0190ak572","country_code":"US","type":"education","lineage":["https://openalex.org/I57206974"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"A\u00e9cio Santos","raw_affiliation_strings":["New York University"],"affiliations":[{"raw_affiliation_string":"New York University","institution_ids":["https://openalex.org/I57206974"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110197814","display_name":"Flip Korn","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Flip Korn","raw_affiliation_strings":["Google Research"],"affiliations":[{"raw_affiliation_string":"Google Research","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5006773757","display_name":"Juliana Freire","orcid":"https://orcid.org/0000-0003-3915-7075"},"institutions":[{"id":"https://openalex.org/I57206974","display_name":"New York University","ror":"https://ror.org/0190ak572","country_code":"US","type":"education","lineage":["https://openalex.org/I57206974"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Juliana Freire","raw_affiliation_strings":["New York University"],"affiliations":[{"raw_affiliation_string":"New York University","institution_ids":["https://openalex.org/I57206974"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5101972578"],"corresponding_institution_ids":["https://openalex.org/I57206974"],"apc_list":null,"apc_paid":null,"fwci":0.8142,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.77258061,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"193","last_page":"206"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10538","display_name":"Data Mining Algorithms and Applications","score":0.8920999765396118,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10538","display_name":"Data Mining Algorithms and Applications","score":0.8920999765396118,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11303","display_name":"Bayesian Modeling and Causal Inference","score":0.8195000290870667,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10057","display_name":"Face and Expression Recognition","score":0.7982000112533569,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/mutual-information","display_name":"Mutual information","score":0.7700045108795166},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6680149435997009},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.3817474842071533},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.3561035096645355},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.24419823288917542}],"concepts":[{"id":"https://openalex.org/C152139883","wikidata":"https://www.wikidata.org/wiki/Q252973","display_name":"Mutual information","level":2,"score":0.7700045108795166},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6680149435997009},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3817474842071533},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.3561035096645355},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.24419823288917542}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icde60146.2024.00022","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icde60146.2024.00022","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE 40th International Conference on Data Engineering (ICDE)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1288460465","display_name":null,"funder_award_id":"ISS-2106888","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320306078","display_name":"U.S. Department of Defense","ror":"https://ror.org/0447fe631"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":74,"referenced_works":["https://openalex.org/W1635484243","https://openalex.org/W1969621019","https://openalex.org/W1981491391","https://openalex.org/W1989151402","https://openalex.org/W1992363839","https://openalex.org/W2020584928","https://openalex.org/W2024472792","https://openalex.org/W2067211778","https://openalex.org/W2082290707","https://openalex.org/W2085845250","https://openalex.org/W2092939357","https://openalex.org/W2095096988","https://openalex.org/W2104157166","https://openalex.org/W2113051562","https://openalex.org/W2114771311","https://openalex.org/W2118561568","https://openalex.org/W2119885577","https://openalex.org/W2142357860","https://openalex.org/W2153406069","https://openalex.org/W2159222783","https://openalex.org/W2167101736","https://openalex.org/W2243803726","https://openalex.org/W2444650685","https://openalex.org/W2612048434","https://openalex.org/W2618188015","https://openalex.org/W2620813839","https://openalex.org/W2742990887","https://openalex.org/W2750765126","https://openalex.org/W2767724860","https://openalex.org/W2795089200","https://openalex.org/W2798664493","https://openalex.org/W2926805670","https://openalex.org/W2948163032","https://openalex.org/W2950817225","https://openalex.org/W2962979766","https://openalex.org/W2963174348","https://openalex.org/W2965870078","https://openalex.org/W2977730413","https://openalex.org/W3000710917","https://openalex.org/W3003748944","https://openalex.org/W3034219587","https://openalex.org/W3037852608","https://openalex.org/W3080831182","https://openalex.org/W3081465174","https://openalex.org/W3105524694","https://openalex.org/W3139834496","https://openalex.org/W3139909695","https://openalex.org/W3174810817","https://openalex.org/W3175111921","https://openalex.org/W3176950644","https://openalex.org/W3196904276","https://openalex.org/W3215670116","https://openalex.org/W4224951911","https://openalex.org/W4231760708","https://openalex.org/W4232091442","https://openalex.org/W4283383705","https://openalex.org/W4284958237","https://openalex.org/W4289236186","https://openalex.org/W4289533971","https://openalex.org/W4289533982","https://openalex.org/W4291714342","https://openalex.org/W4379135640","https://openalex.org/W4379135712","https://openalex.org/W4379390291","https://openalex.org/W4379390557","https://openalex.org/W4385270264","https://openalex.org/W4385270337","https://openalex.org/W4401352200","https://openalex.org/W6645423673","https://openalex.org/W6680940449","https://openalex.org/W6744238343","https://openalex.org/W6755987414","https://openalex.org/W6780093468","https://openalex.org/W6791784704"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W2466816617","https://openalex.org/W1970834875","https://openalex.org/W842936808","https://openalex.org/W3174028392","https://openalex.org/W2390279801","https://openalex.org/W2358668433","https://openalex.org/W2000517284","https://openalex.org/W4396701345"],"abstract_inverted_index":{"Relational":[0],"data":[1,9,43],"augmentation":[2],"is":[3,24],"a":[4,35,109,130],"powerful":[5],"technique":[6],"for":[7,102],"enhancing":[8],"analytics":[10],"and":[11,128,158],"improving":[12],"machine":[13],"learning":[14],"models":[15],"by":[16,92,121],"incorporating":[17],"columns":[18],"from":[19,50,66],"external":[20,30,51],"datasets.":[21,160],"However,":[22,59],"it":[23],"challenging":[25],"to":[26,32,46,75,139],"efficiently":[27],"discover":[28],"relevant":[29,104],"tables":[31,49,64,134],"join":[33],"with":[34],"given":[36],"input":[37],"table.":[38],"Existing":[39],"approaches":[40],"rely":[41],"on":[42,55],"discovery":[44,119],"systems":[45,68],"identify":[47],"\u201cjoinable\u201d":[48],"sources,":[52],"typically":[53],"based":[54],"overlap":[56],"or":[57,83],"containment.":[58],"the":[60,94,126,145],"sheer":[61],"number":[62],"of":[63,96,117,133,147],"obtained":[65],"these":[67],"results":[69],"in":[70,86,153],"irrelevant":[71],"joins":[72,127],"that":[73,113,135],"need":[74],"be":[76,80,140],"performed;":[77],"this":[78,90],"can":[79],"computationally":[81],"expensive":[82],"even":[84],"infeasible":[85],"practice.":[87],"We":[88,107,142],"address":[89],"limitation":[91],"proposing":[93],"use":[95],"efficient":[97,115],"mutual":[98],"information":[99],"(MI)":[100],"estimation":[101],"finding":[103],"joinable":[105],"tables.":[106],"introduce":[108],"new":[110],"sketching":[111],"method":[112],"enables":[114],"evaluation":[116],"relationship":[118],"queries":[120],"estimating":[122],"MI":[123,152],"without":[124],"materializing":[125],"returning":[129],"smaller":[131],"set":[132],"are":[136],"more":[137],"likely":[138],"relevant.":[141],"also":[143],"demonstrate":[144],"effectiveness":[146],"our":[148],"approach":[149],"at":[150],"approximating":[151],"extensive":[154],"experiments":[155],"using":[156],"synthetic":[157],"real-world":[159]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2025-12-21T23:12:01.093139","created_date":"2025-10-10T00:00:00"}
