{"id":"https://openalex.org/W2027780984","doi":"https://doi.org/10.1145/1014052.1014071","title":"Discovering complex matchings across web query interfaces","display_name":"Discovering complex matchings across web query interfaces","publication_year":2004,"publication_date":"2004-08-22","ids":{"openalex":"https://openalex.org/W2027780984","doi":"https://doi.org/10.1145/1014052.1014071","mag":"2027780984"},"language":"en","primary_location":{"id":"doi:10.1145/1014052.1014071","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1014052.1014071","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the tenth ACM SIGKDD international conference on Knowledge discovery and data mining","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101494989","display_name":"Bin He","orcid":"https://orcid.org/0000-0001-7385-5542"},"institutions":[{"id":"https://openalex.org/I157725225","display_name":"University of Illinois Urbana-Champaign","ror":"https://ror.org/047426m28","country_code":"US","type":"education","lineage":["https://openalex.org/I157725225"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Bin He","raw_affiliation_strings":["University of Illinois at Urbana-Champaign, IL"],"affiliations":[{"raw_affiliation_string":"University of Illinois at Urbana-Champaign, IL","institution_ids":["https://openalex.org/I157725225"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101880377","display_name":"Kevin Chen\u2013Chuan Chang","orcid":"https://orcid.org/0000-0003-0997-6803"},"institutions":[{"id":"https://openalex.org/I157725225","display_name":"University of Illinois Urbana-Champaign","ror":"https://ror.org/047426m28","country_code":"US","type":"education","lineage":["https://openalex.org/I157725225"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Kevin Chen-Chuan Chang","raw_affiliation_strings":["University of Illinois at Urbana-Champaign, IL"],"affiliations":[{"raw_affiliation_string":"University of Illinois at Urbana-Champaign, IL","institution_ids":["https://openalex.org/I157725225"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5019539533","display_name":"Jiawei Han","orcid":"https://orcid.org/0000-0002-3629-2696"},"institutions":[{"id":"https://openalex.org/I157725225","display_name":"University of Illinois Urbana-Champaign","ror":"https://ror.org/047426m28","country_code":"US","type":"education","lineage":["https://openalex.org/I157725225"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jiawei Han","raw_affiliation_strings":["University of Illinois at Urbana-Champaign, IL"],"affiliations":[{"raw_affiliation_string":"University of Illinois at Urbana-Champaign, IL","institution_ids":["https://openalex.org/I157725225"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5101494989"],"corresponding_institution_ids":["https://openalex.org/I157725225"],"apc_list":null,"apc_paid":null,"fwci":37.734,"has_fulltext":false,"cited_by_count":142,"citation_normalized_percentile":{"value":0.99690381,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"148","last_page":"157"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10538","display_name":"Data Mining Algorithms and Applications","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10538","display_name":"Data Mining Algorithms and Applications","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10215","display_name":"Semantic Web and Ontologies","score":0.9975000023841858,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11106","display_name":"Data Management and Algorithms","score":0.996999979019165,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7718609571456909},{"id":"https://openalex.org/keywords/schema-matching","display_name":"Schema matching","score":0.7195029258728027},{"id":"https://openalex.org/keywords/schema","display_name":"Schema (genetic algorithms)","score":0.7147262692451477},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.5918973684310913},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.4875769019126892},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.46518272161483765},{"id":"https://openalex.org/keywords/web-query-classification","display_name":"Web query classification","score":0.4602280259132385},{"id":"https://openalex.org/keywords/the-internet","display_name":"The Internet","score":0.45672518014907837},{"id":"https://openalex.org/keywords/web-search-query","display_name":"Web search query","score":0.4345241189002991},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.4059194326400757},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.3584197461605072},{"id":"https://openalex.org/keywords/data-integration","display_name":"Data integration","score":0.28118255734443665},{"id":"https://openalex.org/keywords/search-engine","display_name":"Search engine","score":0.25331443548202515},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.17330825328826904},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.13426589965820312}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7718609571456909},{"id":"https://openalex.org/C2777327318","wikidata":"https://www.wikidata.org/wiki/Q1408390","display_name":"Schema matching","level":3,"score":0.7195029258728027},{"id":"https://openalex.org/C52146309","wikidata":"https://www.wikidata.org/wiki/Q7431116","display_name":"Schema (genetic algorithms)","level":2,"score":0.7147262692451477},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.5918973684310913},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.4875769019126892},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.46518272161483765},{"id":"https://openalex.org/C118689300","wikidata":"https://www.wikidata.org/wiki/Q7978614","display_name":"Web query classification","level":4,"score":0.4602280259132385},{"id":"https://openalex.org/C110875604","wikidata":"https://www.wikidata.org/wiki/Q75","display_name":"The Internet","level":2,"score":0.45672518014907837},{"id":"https://openalex.org/C164120249","wikidata":"https://www.wikidata.org/wiki/Q995982","display_name":"Web search query","level":3,"score":0.4345241189002991},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.4059194326400757},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.3584197461605072},{"id":"https://openalex.org/C72634772","wikidata":"https://www.wikidata.org/wiki/Q386824","display_name":"Data integration","level":2,"score":0.28118255734443665},{"id":"https://openalex.org/C97854310","wikidata":"https://www.wikidata.org/wiki/Q19541","display_name":"Search engine","level":2,"score":0.25331443548202515},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.17330825328826904},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.13426589965820312},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/1014052.1014071","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1014052.1014071","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the tenth ACM SIGKDD international conference on Knowledge discovery and data mining","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":22,"referenced_works":["https://openalex.org/W20214369","https://openalex.org/W1586806958","https://openalex.org/W1728974518","https://openalex.org/W1969831559","https://openalex.org/W1988413436","https://openalex.org/W2008896880","https://openalex.org/W2066277072","https://openalex.org/W2089634871","https://openalex.org/W2100417212","https://openalex.org/W2105423800","https://openalex.org/W2108489852","https://openalex.org/W2123853152","https://openalex.org/W2125969310","https://openalex.org/W2139135093","https://openalex.org/W2150365753","https://openalex.org/W2166559705","https://openalex.org/W2210278139","https://openalex.org/W2221553715","https://openalex.org/W2798170591","https://openalex.org/W6678605463","https://openalex.org/W6680506379","https://openalex.org/W7057317600"],"related_works":["https://openalex.org/W2026738364","https://openalex.org/W1528218860","https://openalex.org/W2096359267","https://openalex.org/W2901901036","https://openalex.org/W2124814993","https://openalex.org/W2163573236","https://openalex.org/W1953798041","https://openalex.org/W2113390685","https://openalex.org/W3114052401","https://openalex.org/W2330073594"],"abstract_inverted_index":{"To":[0,40],"enable":[1],"information":[2],"integration,":[3],"schema":[4,53,212],"matching":[5,54,62,177],"is":[6],"a":[7,47,152,220],"critical":[8],"step":[9],"for":[10,58,243],"discovering":[11,244],"semantic":[12,112],"correspondences":[13],"of":[14,25,61,165,170,203,219],"attributes":[15,115,135],"across":[16,105],"heterogeneous":[17],"sources.":[18],"While":[19],"complex":[20,29,82,111,149,245],"matchings":[21,83,150],"are":[22,136],"common,":[23],"because":[24,139],"their":[26],"far":[27],"more":[28],"search":[30],"space,":[31],"most":[32],"existing":[33],"techniques":[34],"focus":[35,186],"on":[36,71,187],"simple":[37],"1:1":[38],"matchings.":[39,246],"tackle":[41],"this":[42,44,75],"challenge,":[43],"paper":[45],"takes":[46],"conceptually":[48],"novel":[49],"approach":[50,235],"by":[51,151],"viewing":[52],"as":[55],"correlation":[56,153,181,222],"mining,":[57],"our":[59,192,234],"task":[60],"Web":[63],"query":[64,78,106,126],"interfaces":[65,79,107,127],"to":[66,90,122,147,207,216],"integrate":[67],"the":[68,72,96,102,160,201,217,238],"myriad":[69],"databases":[70],"Internet.":[73],"On":[74],"\"deep":[76],"Web,\"":[77],"generally":[80],"form":[81],"between":[84],"attribute":[85],"groups":[86],"(e.g.,":[87,116],"[author]":[88],"corresponds":[89],"[first":[91,117],"name,":[92,118],"last":[93,119],"name]":[94],"in":[95,125,211,229],"Books":[97],"domain).":[98],"We":[99,232],"observe":[100],"that":[101],"co-occurrences":[103],"patterns":[104],"often":[108],"reveal":[109],"such":[110],"relationships:":[113],"grouping":[114],"name])":[120],"tend":[121],"be":[123],"co-present":[124],"and":[128,172,175,197,237],"thus":[129],"positively":[130],"correlated.":[131],"In":[132,156],"contrast,":[133],"synonym":[134],"negatively":[137],"correlated":[138],"they":[140],"rarely":[141],"co-occur.":[142],"This":[143,214],"insight":[144],"enables":[145],"us":[146],"discover":[148],"mining":[154,169,182],"approach.":[155],"particular,":[157],"we":[158],"develop":[159],"DCM":[161],"framework,":[162],"which":[163,184],"consists":[164],"data":[166],"preparation,":[167],"dual":[168],"positive":[171,190,196],"negative":[173,198,204],"correlations,":[174,191,199,205],"finally":[176],"selection.":[178],"Unlike":[179],"previous":[180,230],"algorithms,":[183],"mainly":[185],"finding":[188],"strong":[189],"algorithm":[193],"cares":[194],"both":[195],"especially":[200],"subtlety":[202],"due":[206],"its":[208],"special":[209],"importance":[210],"matching.":[213],"leads":[215],"introduction":[218],"new":[221],"measure,":[223],"$H$-measure,":[224],"distinct":[225],"from":[226],"those":[227],"proposed":[228],"work.":[231],"evaluate":[233],"extensively":[236],"results":[239],"show":[240],"good":[241],"accuracy":[242]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":2},{"year":2018,"cited_by_count":2},{"year":2017,"cited_by_count":3},{"year":2016,"cited_by_count":6},{"year":2015,"cited_by_count":3},{"year":2014,"cited_by_count":3},{"year":2013,"cited_by_count":7},{"year":2012,"cited_by_count":7}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
