{"id":"https://openalex.org/W4415382717","doi":"https://doi.org/10.1145/3772001","title":"Exploring Heterogeneous Data Lake Based on Canonical\u00a0Graphs","display_name":"Exploring Heterogeneous Data Lake Based on Canonical\u00a0Graphs","publication_year":2025,"publication_date":"2025-10-21","ids":{"openalex":"https://openalex.org/W4415382717","doi":"https://doi.org/10.1145/3772001"},"language":"en","primary_location":{"id":"doi:10.1145/3772001","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3772001","pdf_url":null,"source":{"id":"https://openalex.org/S4394735545","display_name":"ACM Transactions on Information Systems","issn_l":"1046-8188","issn":["1046-8188","1558-2868"],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Information Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101899043","display_name":"Yuan Qin","orcid":"https://orcid.org/0009-0001-7123-6155"},"institutions":[{"id":"https://openalex.org/I125839683","display_name":"Beijing Institute of Technology","ror":"https://ror.org/01skt4w74","country_code":"CN","type":"education","lineage":["https://openalex.org/I125839683","https://openalex.org/I890469752"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Qin Yuan","raw_affiliation_strings":["Beijing Institute of Technology, Beijing, China","Beijing Institute of Technology, China"],"affiliations":[{"raw_affiliation_string":"Beijing Institute of Technology, Beijing, China","institution_ids":["https://openalex.org/I125839683"]},{"raw_affiliation_string":"Beijing Institute of Technology, China","institution_ids":["https://openalex.org/I125839683"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014346487","display_name":"Ye Yuan","orcid":"https://orcid.org/0000-0002-0247-9866"},"institutions":[{"id":"https://openalex.org/I125839683","display_name":"Beijing Institute of Technology","ror":"https://ror.org/01skt4w74","country_code":"CN","type":"education","lineage":["https://openalex.org/I125839683","https://openalex.org/I890469752"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ye Yuan","raw_affiliation_strings":["Beijing Institute of Technology, Beijing, China","Beijing Institute of Technology, China"],"affiliations":[{"raw_affiliation_string":"Beijing Institute of Technology, Beijing, China","institution_ids":["https://openalex.org/I125839683"]},{"raw_affiliation_string":"Beijing Institute of Technology, China","institution_ids":["https://openalex.org/I125839683"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5081043543","display_name":"Zhenyu Wen","orcid":"https://orcid.org/0000-0002-2914-912X"},"institutions":[{"id":"https://openalex.org/I55712492","display_name":"Zhejiang University of Technology","ror":"https://ror.org/02djqfd08","country_code":"CN","type":"education","lineage":["https://openalex.org/I55712492"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhenyu Wen","raw_affiliation_strings":["Zhejiang University of Technology, Hangzhou, China","Zhejiang University of Technology, China"],"affiliations":[{"raw_affiliation_string":"Zhejiang University of Technology, Hangzhou, China","institution_ids":["https://openalex.org/I55712492"]},{"raw_affiliation_string":"Zhejiang University of Technology, China","institution_ids":["https://openalex.org/I55712492"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5054991337","display_name":"Guoren Wang","orcid":"https://orcid.org/0000-0002-0181-8379"},"institutions":[{"id":"https://openalex.org/I125839683","display_name":"Beijing Institute of Technology","ror":"https://ror.org/01skt4w74","country_code":"CN","type":"education","lineage":["https://openalex.org/I125839683","https://openalex.org/I890469752"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Guoren Wang","raw_affiliation_strings":["Beijing Institute of Technology, Beijing, China","Beijing Institute of Technology, China"],"affiliations":[{"raw_affiliation_string":"Beijing Institute of Technology, Beijing, China","institution_ids":["https://openalex.org/I125839683"]},{"raw_affiliation_string":"Beijing Institute of Technology, China","institution_ids":["https://openalex.org/I125839683"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5101899043"],"corresponding_institution_ids":["https://openalex.org/I125839683"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.35079784,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"44","issue":"2","first_page":"1","last_page":"29"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9977999925613403,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9977999925613403,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12292","display_name":"Graph Theory and Algorithms","score":0.9962999820709229,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.9955999851226807,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.6872000098228455},{"id":"https://openalex.org/keywords/pruning","display_name":"Pruning","score":0.5666999816894531},{"id":"https://openalex.org/keywords/keyword-search","display_name":"Keyword search","score":0.5378999710083008},{"id":"https://openalex.org/keywords/schema","display_name":"Schema (genetic algorithms)","score":0.5372999906539917},{"id":"https://openalex.org/keywords/identification","display_name":"Identification (biology)","score":0.44839999079704285},{"id":"https://openalex.org/keywords/tree","display_name":"Tree (set theory)","score":0.39169999957084656},{"id":"https://openalex.org/keywords/search-algorithm","display_name":"Search algorithm","score":0.3312000036239624},{"id":"https://openalex.org/keywords/data-integration","display_name":"Data integration","score":0.29280000925064087}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8845000267028809},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.6872000098228455},{"id":"https://openalex.org/C108010975","wikidata":"https://www.wikidata.org/wiki/Q500094","display_name":"Pruning","level":2,"score":0.5666999816894531},{"id":"https://openalex.org/C2988412617","wikidata":"https://www.wikidata.org/wiki/Q7441656","display_name":"Keyword search","level":2,"score":0.5378999710083008},{"id":"https://openalex.org/C52146309","wikidata":"https://www.wikidata.org/wiki/Q7431116","display_name":"Schema (genetic algorithms)","level":2,"score":0.5372999906539917},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.4846999943256378},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.44839999079704285},{"id":"https://openalex.org/C113174947","wikidata":"https://www.wikidata.org/wiki/Q2859736","display_name":"Tree (set theory)","level":2,"score":0.39169999957084656},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.38199999928474426},{"id":"https://openalex.org/C125583679","wikidata":"https://www.wikidata.org/wiki/Q755673","display_name":"Search algorithm","level":2,"score":0.3312000036239624},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.30720001459121704},{"id":"https://openalex.org/C72634772","wikidata":"https://www.wikidata.org/wiki/Q386824","display_name":"Data integration","level":2,"score":0.29280000925064087},{"id":"https://openalex.org/C162319229","wikidata":"https://www.wikidata.org/wiki/Q175263","display_name":"Data structure","level":2,"score":0.2847999930381775},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.2806999981403351},{"id":"https://openalex.org/C207024777","wikidata":"https://www.wikidata.org/wiki/Q621673","display_name":"Search tree","level":3,"score":0.2768000066280365},{"id":"https://openalex.org/C160920958","wikidata":"https://www.wikidata.org/wiki/Q7662746","display_name":"Synthetic data","level":2,"score":0.2700999975204468},{"id":"https://openalex.org/C135572916","wikidata":"https://www.wikidata.org/wiki/Q193351","display_name":"Data warehouse","level":2,"score":0.2689000070095062},{"id":"https://openalex.org/C2988416141","wikidata":"https://www.wikidata.org/wiki/Q6031139","display_name":"Information loss","level":2,"score":0.2630999982357025},{"id":"https://openalex.org/C3018263672","wikidata":"https://www.wikidata.org/wiki/Q1296251","display_name":"Efficient algorithm","level":2,"score":0.2612000107765198},{"id":"https://openalex.org/C192028432","wikidata":"https://www.wikidata.org/wiki/Q845739","display_name":"Query language","level":2,"score":0.2572000026702881},{"id":"https://openalex.org/C164120249","wikidata":"https://www.wikidata.org/wiki/Q995982","display_name":"Web search query","level":3,"score":0.2558000087738037}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3772001","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3772001","pdf_url":null,"source":{"id":"https://openalex.org/S4394735545","display_name":"ACM Transactions on Information Systems","issn_l":"1046-8188","issn":["1046-8188","1558-2868"],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Information Systems","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":37,"referenced_works":["https://openalex.org/W631140850","https://openalex.org/W1481060405","https://openalex.org/W1493773376","https://openalex.org/W1519440323","https://openalex.org/W1592186521","https://openalex.org/W2045271686","https://openalex.org/W2069065514","https://openalex.org/W2078446440","https://openalex.org/W2162783807","https://openalex.org/W2164858177","https://openalex.org/W2169891106","https://openalex.org/W2576210995","https://openalex.org/W2596240654","https://openalex.org/W2884952043","https://openalex.org/W2911585132","https://openalex.org/W2970992672","https://openalex.org/W2990016312","https://openalex.org/W3001851942","https://openalex.org/W3014705052","https://openalex.org/W3018570388","https://openalex.org/W3044191208","https://openalex.org/W3123375411","https://openalex.org/W3151923855","https://openalex.org/W4213069590","https://openalex.org/W4213135654","https://openalex.org/W4224307896","https://openalex.org/W4239019441","https://openalex.org/W4248502591","https://openalex.org/W4255619200","https://openalex.org/W4285451014","https://openalex.org/W4298274365","https://openalex.org/W4298468057","https://openalex.org/W4372046852","https://openalex.org/W4380433117","https://openalex.org/W4387010298","https://openalex.org/W4390532727","https://openalex.org/W4399152810"],"related_works":[],"abstract_inverted_index":{"A":[0],"data":[1,8,11,22,44,58,70,84,139],"lake":[2,71],"maintains":[3],"large":[4],"amounts":[5],"of":[6,38,112,180],"heterogeneous":[7,21,43],"with":[9,155],"different":[10,63,105,141,166],"schemas":[12],"and":[13,18,91,178],"query":[14,119],"interfaces.":[15],"Efficiently":[16],"querying":[17],"analyzing":[19],"the":[20,52,69,74,83,102,109,118,136,146,175,181],"enables":[23],"users":[24],"to":[25,73,134],"gain":[26],"more":[27],"complete":[28],"insights.":[29],"In":[30],"this":[31],"article,":[32],"we":[33,86,121],"study":[34],"a":[35,94,123],"novel":[36,124],"problem":[37],"distributed":[39,47],"keyword":[40,80,126],"search":[41,48,81,96,127],"across":[42,104,165],"sources.":[45,142,167],"Traditional":[46],"algorithms":[49],"generally":[50],"require":[51],"predefined":[53],"crossing":[54],"edges":[55],"connecting":[56],"relevant":[57,138],"instances":[59],"for":[60,68],"communication":[61],"between":[62,140],"sources,":[64],"which":[65,100,131],"is":[66],"unpractical":[67],"due":[72],"schema":[75],"heterogeneity.":[76],"To":[77,115],"effectively":[78],"perform":[79],"over":[82],"lake,":[85],"first":[87],"introduce":[88],"canonical":[89],"graphs":[90],"then":[92],"develop":[93],"best-first":[95],"algorithm":[97,128],"called":[98,129],"UnifySea,":[99],"explores":[101],"answers":[103,148],"sources":[106],"based":[107,149],"on":[108,150,170],"unified":[110],"identification":[111],"related":[113],"instances.":[114],"further":[116],"improve":[117],"efficiency,":[120,177],"propose":[122],"incremental":[125],"DistSea,":[130],"just":[132],"need":[133],"identify":[135],"promising":[137],"DistSea":[143,160],"incrementally":[144],"calculates":[145],"optimal":[147],"locally":[151],"partial":[152],"evaluation.":[153],"Equipped":[154],"several":[156],"efficient":[157],"pruning":[158],"rules,":[159],"reduces":[161],"unpromising":[162],"tree":[163],"calculation":[164],"Experimental":[168],"evaluations":[169],"six":[171],"real-world":[172],"benchmarks":[173],"demonstrate":[174],"effectiveness,":[176],"scalability":[179],"proposed":[182],"algorithms.":[183]},"counts_by_year":[],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-22T00:00:00"}
