{"id":"https://openalex.org/W4311630552","doi":"https://doi.org/10.3390/info13120568","title":"Incremental Entity Blocking over Heterogeneous Streaming Data","display_name":"Incremental Entity Blocking over Heterogeneous Streaming Data","publication_year":2022,"publication_date":"2022-12-05","ids":{"openalex":"https://openalex.org/W4311630552","doi":"https://doi.org/10.3390/info13120568"},"language":"en","primary_location":{"id":"doi:10.3390/info13120568","is_oa":true,"landing_page_url":"https://doi.org/10.3390/info13120568","pdf_url":"https://www.mdpi.com/2078-2489/13/12/568/pdf?version=1670504041","source":{"id":"https://openalex.org/S4210219776","display_name":"Information","issn_l":"2078-2489","issn":["2078-2489"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Information","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.mdpi.com/2078-2489/13/12/568/pdf?version=1670504041","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5040142167","display_name":"Tiago Brasileiro Ara\u00fajo","orcid":"https://orcid.org/0000-0001-6339-9117"},"institutions":[{"id":"https://openalex.org/I166825849","display_name":"Tampere University","ror":"https://ror.org/033003e23","country_code":"FI","type":"education","lineage":["https://openalex.org/I166825849"]},{"id":"https://openalex.org/I3018006580","display_name":"Instituto Federal de Educa\u00e7\u00e3o Ci\u00eancia e Tecnologia da Para\u00edba","ror":"https://ror.org/01xc5jm57","country_code":"BR","type":"education","lineage":["https://openalex.org/I3018006580"]},{"id":"https://openalex.org/I41455075","display_name":"Universidade Federal de Campina Grande","ror":"https://ror.org/00eftnx64","country_code":"BR","type":"education","lineage":["https://openalex.org/I41455075"]},{"id":"https://openalex.org/I4210112416","display_name":"Instituto Federal de Educa\u00e7\u00e3o, Ci\u00eancia e Tecnologia do Par\u00e1","ror":"https://ror.org/02239nd21","country_code":"BR","type":"education","lineage":["https://openalex.org/I1293487690","https://openalex.org/I2801200668","https://openalex.org/I4210112416"]}],"countries":["BR","FI"],"is_corresponding":true,"raw_author_name":"Tiago Brasileiro Ara\u00fajo","raw_affiliation_strings":["Academic Unit of Systems and Computing, Federal University of Campina Grande, Campina Grande 58429-900, Brazil","Faculty of Information Technology and Communication Sciences, Tampere University, 33100 Tampere, Finland","Federal Institute of Para\u00edba, Monteiro 58500-000, Brazil"],"raw_orcid":"https://orcid.org/0000-0001-6339-9117","affiliations":[{"raw_affiliation_string":"Academic Unit of Systems and Computing, Federal University of Campina Grande, Campina Grande 58429-900, Brazil","institution_ids":["https://openalex.org/I41455075"]},{"raw_affiliation_string":"Faculty of Information Technology and Communication Sciences, Tampere University, 33100 Tampere, Finland","institution_ids":["https://openalex.org/I166825849"]},{"raw_affiliation_string":"Federal Institute of Para\u00edba, Monteiro 58500-000, Brazil","institution_ids":["https://openalex.org/I3018006580","https://openalex.org/I4210112416"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5042740371","display_name":"Kostas Stefanidis","orcid":"https://orcid.org/0000-0003-1317-8062"},"institutions":[{"id":"https://openalex.org/I166825849","display_name":"Tampere University","ror":"https://ror.org/033003e23","country_code":"FI","type":"education","lineage":["https://openalex.org/I166825849"]}],"countries":["FI"],"is_corresponding":false,"raw_author_name":"Kostas Stefanidis","raw_affiliation_strings":["Faculty of Information Technology and Communication Sciences, Tampere University, 33100 Tampere, Finland"],"raw_orcid":"https://orcid.org/0000-0003-1317-8062","affiliations":[{"raw_affiliation_string":"Faculty of Information Technology and Communication Sciences, Tampere University, 33100 Tampere, Finland","institution_ids":["https://openalex.org/I166825849"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040330943","display_name":"Carlos Eduardo Santos Pires","orcid":"https://orcid.org/0000-0001-7743-899X"},"institutions":[{"id":"https://openalex.org/I41455075","display_name":"Universidade Federal de Campina Grande","ror":"https://ror.org/00eftnx64","country_code":"BR","type":"education","lineage":["https://openalex.org/I41455075"]}],"countries":["BR"],"is_corresponding":false,"raw_author_name":"Carlos Eduardo Santos Pires","raw_affiliation_strings":["Academic Unit of Systems and Computing, Federal University of Campina Grande, Campina Grande 58429-900, Brazil"],"raw_orcid":"https://orcid.org/0000-0001-7743-899X","affiliations":[{"raw_affiliation_string":"Academic Unit of Systems and Computing, Federal University of Campina Grande, Campina Grande 58429-900, Brazil","institution_ids":["https://openalex.org/I41455075"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5045034530","display_name":"Jyrki Nummenmaa","orcid":"https://orcid.org/0000-0002-7476-7840"},"institutions":[{"id":"https://openalex.org/I166825849","display_name":"Tampere University","ror":"https://ror.org/033003e23","country_code":"FI","type":"education","lineage":["https://openalex.org/I166825849"]}],"countries":["FI"],"is_corresponding":false,"raw_author_name":"Jyrki Nummenmaa","raw_affiliation_strings":["Faculty of Information Technology and Communication Sciences, Tampere University, 33100 Tampere, Finland"],"raw_orcid":"https://orcid.org/0000-0002-7476-7840","affiliations":[{"raw_affiliation_string":"Faculty of Information Technology and Communication Sciences, Tampere University, 33100 Tampere, Finland","institution_ids":["https://openalex.org/I166825849"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5088926017","display_name":"Thiago N\u00f3brega","orcid":"https://orcid.org/0000-0001-7532-2109"},"institutions":[{"id":"https://openalex.org/I41455075","display_name":"Universidade Federal de Campina Grande","ror":"https://ror.org/00eftnx64","country_code":"BR","type":"education","lineage":["https://openalex.org/I41455075"]}],"countries":["BR"],"is_corresponding":false,"raw_author_name":"Thiago Pereira da N\u00f3brega","raw_affiliation_strings":["Academic Unit of Systems and Computing, Federal University of Campina Grande, Campina Grande 58429-900, Brazil"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Academic Unit of Systems and Computing, Federal University of Campina Grande, Campina Grande 58429-900, Brazil","institution_ids":["https://openalex.org/I41455075"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5040142167"],"corresponding_institution_ids":["https://openalex.org/I166825849","https://openalex.org/I3018006580","https://openalex.org/I41455075","https://openalex.org/I4210112416"],"apc_list":{"value":1400,"currency":"CHF","value_usd":1515},"apc_paid":{"value":1400,"currency":"CHF","value_usd":1515},"fwci":1.0132,"has_fulltext":true,"cited_by_count":6,"citation_normalized_percentile":{"value":0.79028174,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":97},"biblio":{"volume":"13","issue":"12","first_page":"568","last_page":"568"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.9965000152587891,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9955999851226807,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8590377569198608},{"id":"https://openalex.org/keywords/blocking","display_name":"Blocking (statistics)","score":0.7651969194412231},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.5034369826316833},{"id":"https://openalex.org/keywords/schema","display_name":"Schema (genetic algorithms)","score":0.5000460147857666},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.4492432773113251},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.4215981066226959},{"id":"https://openalex.org/keywords/streaming-data","display_name":"Streaming data","score":0.4141008257865906},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.24964243173599243},{"id":"https://openalex.org/keywords/computer-network","display_name":"Computer network","score":0.20763206481933594}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8590377569198608},{"id":"https://openalex.org/C144745244","wikidata":"https://www.wikidata.org/wiki/Q4927286","display_name":"Blocking (statistics)","level":2,"score":0.7651969194412231},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.5034369826316833},{"id":"https://openalex.org/C52146309","wikidata":"https://www.wikidata.org/wiki/Q7431116","display_name":"Schema (genetic algorithms)","level":2,"score":0.5000460147857666},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.4492432773113251},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.4215981066226959},{"id":"https://openalex.org/C2777611316","wikidata":"https://www.wikidata.org/wiki/Q39045282","display_name":"Streaming data","level":2,"score":0.4141008257865906},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.24964243173599243},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.20763206481933594},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.3390/info13120568","is_oa":true,"landing_page_url":"https://doi.org/10.3390/info13120568","pdf_url":"https://www.mdpi.com/2078-2489/13/12/568/pdf?version=1670504041","source":{"id":"https://openalex.org/S4210219776","display_name":"Information","issn_l":"2078-2489","issn":["2078-2489"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Information","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:b48532fa312b4e64acd2945be14b3dc6","is_oa":true,"landing_page_url":"https://doaj.org/article/b48532fa312b4e64acd2945be14b3dc6","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Information, Vol 13, Iss 12, p 568 (2022)","raw_type":"article"},{"id":"pmh:oai:mdpi.com:/2078-2489/13/12/568/","is_oa":true,"landing_page_url":"https://dx.doi.org/10.3390/info13120568","pdf_url":null,"source":{"id":"https://openalex.org/S4306400947","display_name":"MDPI (MDPI AG)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210097602","host_organization_name":"Multidisciplinary Digital Publishing Institute (Switzerland)","host_organization_lineage":["https://openalex.org/I4210097602"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Information; Volume 13; Issue 12; Pages: 568","raw_type":"Text"},{"id":"pmh:oai:trepo.tuni.fi:10024/223407","is_oa":true,"landing_page_url":"https://trepo.tuni.fi/handle/10024/223407","pdf_url":null,"source":{"id":"https://openalex.org/S7407055260","display_name":"Trepo - Institutional Repository of Tampere University","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.3390/info13120568","is_oa":true,"landing_page_url":"https://doi.org/10.3390/info13120568","pdf_url":"https://www.mdpi.com/2078-2489/13/12/568/pdf?version=1670504041","source":{"id":"https://openalex.org/S4210219776","display_name":"Information","issn_l":"2078-2489","issn":["2078-2489"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Information","raw_type":"journal-article"},"sustainable_development_goals":[{"display_name":"Industry, innovation and infrastructure","score":0.5099999904632568,"id":"https://metadata.un.org/sdg/9"}],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4311630552.pdf","grobid_xml":"https://content.openalex.org/works/W4311630552.grobid-xml"},"referenced_works_count":78,"referenced_works":["https://openalex.org/W575847903","https://openalex.org/W1999534989","https://openalex.org/W2006642610","https://openalex.org/W2017167478","https://openalex.org/W2035916238","https://openalex.org/W2058733945","https://openalex.org/W2077551526","https://openalex.org/W2079649893","https://openalex.org/W2079659743","https://openalex.org/W2088285696","https://openalex.org/W2090836891","https://openalex.org/W2105016630","https://openalex.org/W2107343618","https://openalex.org/W2132658991","https://openalex.org/W2145908616","https://openalex.org/W2152502401","https://openalex.org/W2157023969","https://openalex.org/W2172237199","https://openalex.org/W2208310611","https://openalex.org/W2210065635","https://openalex.org/W2261544779","https://openalex.org/W2297435753","https://openalex.org/W2331975942","https://openalex.org/W2342256727","https://openalex.org/W2344512664","https://openalex.org/W2396182456","https://openalex.org/W2398333000","https://openalex.org/W2406045281","https://openalex.org/W2416915331","https://openalex.org/W2505774719","https://openalex.org/W2535168187","https://openalex.org/W2559870814","https://openalex.org/W2566927129","https://openalex.org/W2566979091","https://openalex.org/W2584112163","https://openalex.org/W2586025740","https://openalex.org/W2594063363","https://openalex.org/W2595177560","https://openalex.org/W2604741280","https://openalex.org/W2612312633","https://openalex.org/W2615445186","https://openalex.org/W2615785180","https://openalex.org/W2750964846","https://openalex.org/W2772790565","https://openalex.org/W2775696413","https://openalex.org/W2780899891","https://openalex.org/W2798080725","https://openalex.org/W2802301063","https://openalex.org/W2890185232","https://openalex.org/W2898403354","https://openalex.org/W2911949487","https://openalex.org/W2912423452","https://openalex.org/W2923968079","https://openalex.org/W2933781133","https://openalex.org/W2943447120","https://openalex.org/W2952460245","https://openalex.org/W2955570220","https://openalex.org/W2963840760","https://openalex.org/W2973226110","https://openalex.org/W2981763226","https://openalex.org/W2998944545","https://openalex.org/W3008440336","https://openalex.org/W3014689699","https://openalex.org/W3041018940","https://openalex.org/W3045854241","https://openalex.org/W3092962901","https://openalex.org/W3098230111","https://openalex.org/W3124119769","https://openalex.org/W4231155644","https://openalex.org/W4236454292","https://openalex.org/W4242744113","https://openalex.org/W4296041332","https://openalex.org/W4300456194","https://openalex.org/W6603581258","https://openalex.org/W6726351712","https://openalex.org/W6731596640","https://openalex.org/W6760885858","https://openalex.org/W7075681456"],"related_works":["https://openalex.org/W2392835431","https://openalex.org/W1965371215","https://openalex.org/W2126932387","https://openalex.org/W1842396145","https://openalex.org/W2353762239","https://openalex.org/W2108990487","https://openalex.org/W2484966135","https://openalex.org/W2015050211","https://openalex.org/W2126435977","https://openalex.org/W2017131087"],"abstract_inverted_index":{"Web":[0],"systems":[1],"have":[2],"become":[3,19],"a":[4,20,84,97,109,142,171],"valuable":[5],"source":[6],"of":[7,41,89,104,111,149,167,202],"semi-structured":[8],"and":[9,45,76,92,126,134,154,179,190],"streaming":[10,72,93],"data.":[11,78],"In":[12,159],"this":[13,139],"sense,":[14],"Entity":[15],"Resolution":[16],"(ER)":[17],"has":[18],"key":[21],"solution":[22],"for":[23],"integrating":[24],"multiple":[25],"data":[26,32,94,151,168,176],"sources":[27],"or":[28],"identifying":[29],"similarities":[30],"between":[31],"items,":[33],"namely":[34],"entities.":[35],"To":[36,79,101],"avoid":[37],"the":[38,42,54,63,102,147,194,200,203,207],"quadratic":[39],"costs":[40],"ER":[43,59],"task":[44],"improve":[46,135],"efficiency,":[47],"blocking":[48,64,86,112,136,157],"techniques":[49,113],"are":[50,67],"usually":[51],"applied.":[52],"Beyond":[53],"traditional":[55],"challenges":[56,69,117],"faced":[57],"by":[58,62,213],"and,":[60],"consequently,":[61],"techniques,":[65],"there":[66,107],"also":[68],"related":[70],"to":[71,130,193],"data,":[73],"incremental":[74],"processing,":[75],"noisy":[77,91,150],"address":[80,115],"them,":[81],"we":[82,163],"propose":[83],"schema-agnostic":[85],"technique":[87,184,196,209],"capable":[88],"handling":[90],"incrementally":[95],"through":[96],"distributed":[98],"computational":[99],"infrastructure.":[100],"best":[103],"our":[105,160],"knowledge,":[106],"is":[108],"lack":[110],"that":[114,174],"these":[116],"simultaneously.":[118],"This":[119],"work":[120,140],"proposes":[121],"two":[122,204],"strategies":[123,205],"(attribute":[124],"selection":[125],"top-n":[127],"neighborhood":[128],"entities)":[129],"minimize":[131],"resource":[132],"consumption":[133],"efficiency.":[137],"Moreover,":[138],"presents":[141],"noise-tolerant":[143],"algorithm,":[144],"which":[145],"minimizes":[146],"impact":[148],"(e.g.,":[152],"typos":[153],"misspellings)":[155],"on":[156,215],"effectiveness.":[158],"experimental":[161],"evaluation,":[162],"use":[164],"real-world":[165],"pairs":[166],"sources,":[169],"including":[170],"case":[172],"study":[173],"involves":[175],"from":[177],"Twitter":[178],"Google":[180],"News.":[181],"The":[182],"proposed":[183,208],"achieves":[185],"better":[186],"results":[187],"regarding":[188],"effectiveness":[189],"efficiency":[191,212],"compared":[192],"state-of-the-art":[195],"(metablocking).":[197],"More":[198],"precisely,":[199],"application":[201],"over":[206],"alone":[210],"improves":[211],"56%,":[214],"average.":[216]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":3}],"updated_date":"2026-05-21T06:26:12.895304","created_date":"2025-10-10T00:00:00"}
