{"id":"https://openalex.org/W2911280950","doi":"https://doi.org/10.14778/3291264.3291272","title":"Smurf","display_name":"Smurf","publication_year":2018,"publication_date":"2018-11-01","ids":{"openalex":"https://openalex.org/W2911280950","doi":"https://doi.org/10.14778/3291264.3291272","mag":"2911280950"},"language":"en","primary_location":{"id":"doi:10.14778/3291264.3291272","is_oa":false,"landing_page_url":"https://doi.org/10.14778/3291264.3291272","pdf_url":null,"source":{"id":"https://openalex.org/S4210226185","display_name":"Proceedings of the VLDB Endowment","issn_l":"2150-8097","issn":["2150-8097"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the VLDB Endowment","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101047660","display_name":"Paul Suganthan G. C.","orcid":null},"institutions":[{"id":"https://openalex.org/I135310074","display_name":"University of Wisconsin\u2013Madison","ror":"https://ror.org/01y2jtd41","country_code":"US","type":"education","lineage":["https://openalex.org/I135310074"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Paul Suganthan G. C.","raw_affiliation_strings":["University of Wisconsin-Madison"],"affiliations":[{"raw_affiliation_string":"University of Wisconsin-Madison","institution_ids":["https://openalex.org/I135310074"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5086032713","display_name":"Adel Ardalan","orcid":"https://orcid.org/0000-0001-5061-7832"},"institutions":[{"id":"https://openalex.org/I135310074","display_name":"University of Wisconsin\u2013Madison","ror":"https://ror.org/01y2jtd41","country_code":"US","type":"education","lineage":["https://openalex.org/I135310074"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Adel Ardalan","raw_affiliation_strings":["University of Wisconsin-Madison"],"affiliations":[{"raw_affiliation_string":"University of Wisconsin-Madison","institution_ids":["https://openalex.org/I135310074"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110256670","display_name":"AnHai Doan","orcid":null},"institutions":[{"id":"https://openalex.org/I135310074","display_name":"University of Wisconsin\u2013Madison","ror":"https://ror.org/01y2jtd41","country_code":"US","type":"education","lineage":["https://openalex.org/I135310074"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"AnHai Doan","raw_affiliation_strings":["University of Wisconsin-Madison"],"affiliations":[{"raw_affiliation_string":"University of Wisconsin-Madison","institution_ids":["https://openalex.org/I135310074"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5035329776","display_name":"Aditya Akella","orcid":"https://orcid.org/0000-0002-5920-170X"},"institutions":[{"id":"https://openalex.org/I135310074","display_name":"University of Wisconsin\u2013Madison","ror":"https://ror.org/01y2jtd41","country_code":"US","type":"education","lineage":["https://openalex.org/I135310074"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Aditya Akella","raw_affiliation_strings":["University of Wisconsin-Madison"],"affiliations":[{"raw_affiliation_string":"University of Wisconsin-Madison","institution_ids":["https://openalex.org/I135310074"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5101047660"],"corresponding_institution_ids":["https://openalex.org/I135310074"],"apc_list":null,"apc_paid":null,"fwci":1.694,"has_fulltext":false,"cited_by_count":16,"citation_normalized_percentile":{"value":0.8624936,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":"12","issue":"3","first_page":"278","last_page":"291"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9939000010490417,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10215","display_name":"Semantic Web and Ontologies","score":0.9894000291824341,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7540583610534668},{"id":"https://openalex.org/keywords/service","display_name":"Service (business)","score":0.6785672307014465},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.6767269372940063},{"id":"https://openalex.org/keywords/string","display_name":"String (physics)","score":0.6004059910774231},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.5423761606216431},{"id":"https://openalex.org/keywords/reuse","display_name":"Reuse","score":0.5163775682449341},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.4803689420223236},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.47748303413391113},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.3421556353569031},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.2982897162437439},{"id":"https://openalex.org/keywords/computer-security","display_name":"Computer security","score":0.20856326818466187},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.10090023279190063},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.09109985828399658}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7540583610534668},{"id":"https://openalex.org/C2780378061","wikidata":"https://www.wikidata.org/wiki/Q25351891","display_name":"Service (business)","level":2,"score":0.6785672307014465},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.6767269372940063},{"id":"https://openalex.org/C157486923","wikidata":"https://www.wikidata.org/wiki/Q1376436","display_name":"String (physics)","level":2,"score":0.6004059910774231},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.5423761606216431},{"id":"https://openalex.org/C206588197","wikidata":"https://www.wikidata.org/wiki/Q846574","display_name":"Reuse","level":2,"score":0.5163775682449341},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.4803689420223236},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.47748303413391113},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.3421556353569031},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.2982897162437439},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.20856326818466187},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.10090023279190063},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.09109985828399658},{"id":"https://openalex.org/C136264566","wikidata":"https://www.wikidata.org/wiki/Q159810","display_name":"Economy","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C548081761","wikidata":"https://www.wikidata.org/wiki/Q180388","display_name":"Waste management","level":1,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C37914503","wikidata":"https://www.wikidata.org/wiki/Q156495","display_name":"Mathematical physics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.14778/3291264.3291272","is_oa":false,"landing_page_url":"https://doi.org/10.14778/3291264.3291272","pdf_url":null,"source":{"id":"https://openalex.org/S4210226185","display_name":"Proceedings of the VLDB Endowment","issn_l":"2150-8097","issn":["2150-8097"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the VLDB Endowment","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.7099999785423279,"id":"https://metadata.un.org/sdg/15","display_name":"Life in Land"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":52,"referenced_works":["https://openalex.org/W1483135265","https://openalex.org/W1547612978","https://openalex.org/W1983058606","https://openalex.org/W1990947147","https://openalex.org/W1995099886","https://openalex.org/W2001700730","https://openalex.org/W2002597960","https://openalex.org/W2008183828","https://openalex.org/W2028431844","https://openalex.org/W2031250218","https://openalex.org/W2032775418","https://openalex.org/W2044102377","https://openalex.org/W2058978608","https://openalex.org/W2067497031","https://openalex.org/W2082366447","https://openalex.org/W2096598900","https://openalex.org/W2097184821","https://openalex.org/W2097776316","https://openalex.org/W2098416578","https://openalex.org/W2099485165","https://openalex.org/W2101207848","https://openalex.org/W2104511295","https://openalex.org/W2104599107","https://openalex.org/W2105436061","https://openalex.org/W2106105896","https://openalex.org/W2107966677","https://openalex.org/W2108991785","https://openalex.org/W2121516976","https://openalex.org/W2125816831","https://openalex.org/W2127675794","https://openalex.org/W2148148676","https://openalex.org/W2151930506","https://openalex.org/W2161936973","https://openalex.org/W2166400748","https://openalex.org/W2270660075","https://openalex.org/W2284514301","https://openalex.org/W2286724461","https://openalex.org/W2396588571","https://openalex.org/W2542998387","https://openalex.org/W2612526608","https://openalex.org/W2612732335","https://openalex.org/W2752274179","https://openalex.org/W2766530067","https://openalex.org/W2798412430","https://openalex.org/W2811488946","https://openalex.org/W2883780523","https://openalex.org/W2889284307","https://openalex.org/W2911964244","https://openalex.org/W3099883947","https://openalex.org/W6639274278","https://openalex.org/W6674576723","https://openalex.org/W6683401941"],"related_works":["https://openalex.org/W2000444236","https://openalex.org/W2384475851","https://openalex.org/W2389214306","https://openalex.org/W2353602216","https://openalex.org/W2367078749","https://openalex.org/W2965083567","https://openalex.org/W4235240664","https://openalex.org/W2381798600","https://openalex.org/W1838576100","https://openalex.org/W2757182831"],"abstract_inverted_index":{"We":[0,21,74],"argue":[1],"that":[2,23,81],"more":[3,38],"attention":[4],"should":[5],"be":[6,32],"devoted":[7],"to":[8,34,51,96,104,131],"developing":[9],"self-service":[10,26,42,78,145,153],"string":[11,54],"matching":[12,28],"(SM)":[13],"solutions,":[14],"which":[15],"lay":[16,49,118],"users":[17,50],"can":[18,31,64],"easily":[19],"use.":[20],"show":[22],"Falcon,":[24],"a":[25,71,77,101,107],"entity":[27],"(EM)":[29],"solution,":[30],"applied":[33],"SM":[35,43,79,146],"and":[36,69,147,155],"is":[37,62,100],"accurate":[39],"than":[40],"current":[41],"solutions.":[44],"However,":[45],"Falcon":[46],"often":[47],"asks":[48],"label":[52],"many":[53],"pairs":[55],"(e.g.,":[56],"770-1050":[57],"in":[58,137],"our":[59],"experiments).":[60],"This":[61,125],"expensive,":[63],"significantly":[65,143],"compound":[66],"labeling":[67,84],"mistakes,":[68],"takes":[70],"long":[72],"time.":[73],"developed":[75],"Smurf,":[76],"solution":[80,103,126],"reduces":[82],"the":[83,117,135,138],"effort":[85],"by":[86],"43-76%,":[87],"yet":[88],"achieves":[89],"comparable":[90],"F":[91],"1":[92],"accuracy.":[93],"The":[94],"key":[95],"make":[97],"Smurf":[98,111,142],"possible":[99],"novel":[102],"efficiently":[105],"execute":[106],"random":[108,157],"forest":[109,158],"(that":[110],"learns":[112],"via":[113],"active":[114],"learning":[115],"with":[116],"user)":[119],"over":[120,160],"two":[121],"sets":[122],"of":[123],"strings.":[124],"uses":[127],"RDBMS-style":[128],"plan":[129],"optimization":[130],"reuse":[132],"computations":[133],"across":[134],"trees":[136],"forest.":[139],"As":[140],"such,":[141],"advances":[144],"raises":[148],"interesting":[149],"future":[150],"directions":[151],"for":[152],"EM":[154],"scalable":[156],"execution":[159],"structured":[161],"data.":[162]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":2},{"year":2020,"cited_by_count":3},{"year":2019,"cited_by_count":3}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2019-02-21T00:00:00"}
