{"id":"https://openalex.org/W2213275763","doi":"https://doi.org/10.14778/2856318.2856328","title":"Temporal rules discovery for web data cleaning","display_name":"Temporal rules discovery for web data cleaning","publication_year":2015,"publication_date":"2015-12-01","ids":{"openalex":"https://openalex.org/W2213275763","doi":"https://doi.org/10.14778/2856318.2856328","mag":"2213275763"},"language":"en","primary_location":{"id":"doi:10.14778/2856318.2856328","is_oa":false,"landing_page_url":"https://doi.org/10.14778/2856318.2856328","pdf_url":null,"source":{"id":"https://openalex.org/S4210226185","display_name":"Proceedings of the VLDB Endowment","issn_l":"2150-8097","issn":["2150-8097"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the VLDB Endowment","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5009128577","display_name":"Ziawasch Abedjan","orcid":"https://orcid.org/0000-0002-2846-1373"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Ziawasch Abedjan","raw_affiliation_strings":["MIT CSAIL"],"affiliations":[{"raw_affiliation_string":"MIT CSAIL","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5045418504","display_name":"C\u00fcneyt G\u00fcrcan Ak\u00e7ora","orcid":"https://orcid.org/0000-0002-2882-6950"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cuneyt G. Akcora","raw_affiliation_strings":["Qatar Computing Research Institute, HBKU"],"affiliations":[{"raw_affiliation_string":"Qatar Computing Research Institute, HBKU","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026923999","display_name":"Mourad Ouzzani","orcid":"https://orcid.org/0000-0002-4035-3025"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mourad Ouzzani","raw_affiliation_strings":["Qatar Computing Research Institute, HBKU"],"affiliations":[{"raw_affiliation_string":"Qatar Computing Research Institute, HBKU","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5011336242","display_name":"Paolo Papotti","orcid":"https://orcid.org/0000-0003-0651-4128"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Paolo Papotti","raw_affiliation_strings":["Qatar Computing Research Institute, HBKU"],"affiliations":[{"raw_affiliation_string":"Qatar Computing Research Institute, HBKU","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5074724644","display_name":"Michael Stonebraker","orcid":"https://orcid.org/0000-0001-9184-9058"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Michael Stonebraker","raw_affiliation_strings":["MIT CSAIL"],"affiliations":[{"raw_affiliation_string":"MIT CSAIL","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5009128577"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":8.4129,"has_fulltext":false,"cited_by_count":53,"citation_normalized_percentile":{"value":0.97563848,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":"9","issue":"4","first_page":"336","last_page":"347"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10538","display_name":"Data Mining Algorithms and Applications","score":0.9937999844551086,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9857000112533569,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7527328729629517},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.6740896701812744},{"id":"https://openalex.org/keywords/anomaly-detection","display_name":"Anomaly detection","score":0.6602821350097656},{"id":"https://openalex.org/keywords/outlier","display_name":"Outlier","score":0.6390464901924133},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.5935073494911194},{"id":"https://openalex.org/keywords/knowledge-extraction","display_name":"Knowledge extraction","score":0.5816226601600647},{"id":"https://openalex.org/keywords/data-cleansing","display_name":"Data cleansing","score":0.5663115382194519},{"id":"https://openalex.org/keywords/association-rule-learning","display_name":"Association rule learning","score":0.547435462474823},{"id":"https://openalex.org/keywords/dimension","display_name":"Dimension (graph theory)","score":0.543631374835968},{"id":"https://openalex.org/keywords/process-mining","display_name":"Process mining","score":0.4774661362171173},{"id":"https://openalex.org/keywords/business-process-discovery","display_name":"Business process discovery","score":0.4582156538963318},{"id":"https://openalex.org/keywords/k-optimal-pattern-discovery","display_name":"K-optimal pattern discovery","score":0.4441983997821808},{"id":"https://openalex.org/keywords/data-quality","display_name":"Data quality","score":0.430853009223938},{"id":"https://openalex.org/keywords/data-discovery","display_name":"Data discovery","score":0.42721492052078247},{"id":"https://openalex.org/keywords/duration","display_name":"Duration (music)","score":0.42007166147232056},{"id":"https://openalex.org/keywords/conformance-checking","display_name":"Conformance checking","score":0.4110780954360962},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.35751691460609436},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.2888336777687073},{"id":"https://openalex.org/keywords/work-in-process","display_name":"Work in process","score":0.22351506352424622},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.11367109417915344},{"id":"https://openalex.org/keywords/business-process","display_name":"Business process","score":0.10840535163879395},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.0997367799282074},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.09062927961349487}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7527328729629517},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.6740896701812744},{"id":"https://openalex.org/C739882","wikidata":"https://www.wikidata.org/wiki/Q3560506","display_name":"Anomaly detection","level":2,"score":0.6602821350097656},{"id":"https://openalex.org/C79337645","wikidata":"https://www.wikidata.org/wiki/Q779824","display_name":"Outlier","level":2,"score":0.6390464901924133},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.5935073494911194},{"id":"https://openalex.org/C120567893","wikidata":"https://www.wikidata.org/wiki/Q1582085","display_name":"Knowledge extraction","level":2,"score":0.5816226601600647},{"id":"https://openalex.org/C42199009","wikidata":"https://www.wikidata.org/wiki/Q1172378","display_name":"Data cleansing","level":4,"score":0.5663115382194519},{"id":"https://openalex.org/C193524817","wikidata":"https://www.wikidata.org/wiki/Q386780","display_name":"Association rule learning","level":2,"score":0.547435462474823},{"id":"https://openalex.org/C33676613","wikidata":"https://www.wikidata.org/wiki/Q13415176","display_name":"Dimension (graph theory)","level":2,"score":0.543631374835968},{"id":"https://openalex.org/C124670913","wikidata":"https://www.wikidata.org/wiki/Q2608526","display_name":"Process mining","level":5,"score":0.4774661362171173},{"id":"https://openalex.org/C93453677","wikidata":"https://www.wikidata.org/wiki/Q1017580","display_name":"Business process discovery","level":5,"score":0.4582156538963318},{"id":"https://openalex.org/C105445830","wikidata":"https://www.wikidata.org/wiki/Q6322855","display_name":"K-optimal pattern discovery","level":3,"score":0.4441983997821808},{"id":"https://openalex.org/C24756922","wikidata":"https://www.wikidata.org/wiki/Q1757694","display_name":"Data quality","level":3,"score":0.430853009223938},{"id":"https://openalex.org/C2777516300","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data discovery","level":3,"score":0.42721492052078247},{"id":"https://openalex.org/C112758219","wikidata":"https://www.wikidata.org/wiki/Q16038819","display_name":"Duration (music)","level":2,"score":0.42007166147232056},{"id":"https://openalex.org/C2775948798","wikidata":"https://www.wikidata.org/wiki/Q5160261","display_name":"Conformance checking","level":5,"score":0.4110780954360962},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.35751691460609436},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2888336777687073},{"id":"https://openalex.org/C174998907","wikidata":"https://www.wikidata.org/wiki/Q357662","display_name":"Work in process","level":2,"score":0.22351506352424622},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.11367109417915344},{"id":"https://openalex.org/C85345410","wikidata":"https://www.wikidata.org/wiki/Q851587","display_name":"Business process","level":3,"score":0.10840535163879395},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.0997367799282074},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.09062927961349487},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0},{"id":"https://openalex.org/C207505557","wikidata":"https://www.wikidata.org/wiki/Q4374012","display_name":"Business process modeling","level":4,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C93518851","wikidata":"https://www.wikidata.org/wiki/Q180160","display_name":"Metadata","level":2,"score":0.0},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.0},{"id":"https://openalex.org/C202444582","wikidata":"https://www.wikidata.org/wiki/Q837863","display_name":"Pure mathematics","level":1,"score":0.0},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.0},{"id":"https://openalex.org/C124952713","wikidata":"https://www.wikidata.org/wiki/Q8242","display_name":"Literature","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.14778/2856318.2856328","is_oa":false,"landing_page_url":"https://doi.org/10.14778/2856318.2856328","pdf_url":null,"source":{"id":"https://openalex.org/S4210226185","display_name":"Proceedings of the VLDB Endowment","issn_l":"2150-8097","issn":["2150-8097"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the VLDB Endowment","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/9","score":0.4300000071525574,"display_name":"Industry, innovation and infrastructure"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":32,"referenced_works":["https://openalex.org/W149565845","https://openalex.org/W1502954553","https://openalex.org/W1521736627","https://openalex.org/W1565102206","https://openalex.org/W1966392338","https://openalex.org/W1967167578","https://openalex.org/W1976732638","https://openalex.org/W1981457167","https://openalex.org/W1994962776","https://openalex.org/W2013214811","https://openalex.org/W2024443169","https://openalex.org/W2042454345","https://openalex.org/W2044469685","https://openalex.org/W2046298800","https://openalex.org/W2047745978","https://openalex.org/W2079223746","https://openalex.org/W2081186682","https://openalex.org/W2093931624","https://openalex.org/W2097874932","https://openalex.org/W2102489964","https://openalex.org/W2112840274","https://openalex.org/W2113607096","https://openalex.org/W2122323423","https://openalex.org/W2153531471","https://openalex.org/W2159296364","https://openalex.org/W2166549982","https://openalex.org/W2167333415","https://openalex.org/W2169585110","https://openalex.org/W2294688454","https://openalex.org/W2296063924","https://openalex.org/W2803437449","https://openalex.org/W4211139099"],"related_works":["https://openalex.org/W2803918818","https://openalex.org/W794897257","https://openalex.org/W2953829836","https://openalex.org/W3141597438","https://openalex.org/W2035276016","https://openalex.org/W2857149938","https://openalex.org/W3172074448","https://openalex.org/W2336391106","https://openalex.org/W4386315412","https://openalex.org/W93016489"],"abstract_inverted_index":{"Declarative":[0],"rules,":[1,36,178],"such":[2,61,166],"as":[3,16,62,167],"functional":[4],"dependencies,":[5],"are":[6,73,119],"widely":[7],"used":[8],"for":[9,18,100,173],"cleaning":[10,86,229],"data.":[11,28,103],"Several":[12],"systems":[13],"take":[14],"them":[15],"input":[17],"detecting":[19],"errors":[20,133],"and":[21,46,75,128,170,235],"computing":[22],"a":[23,68,105,150,236],"\"clean\"":[24],"version":[25],"of":[26,81,111,114,138,176,184,217,223],"the":[27,44,56,82,85,96,112,135,174,177,185,188,215,218,224,228,241],"To":[29],"support":[30],"domain":[31],"experts,":[32],"in":[33,65,70,187,227,240],"specifying":[34],"these":[35,147],"several":[37],"tools":[38],"have":[39,53,67],"been":[40],"proposed":[41],"to":[42,158,233],"profile":[43],"data":[45,186,197,219],"mine":[47],"rules.":[48],"However,":[49],"existing":[50],"discovery":[51,98,106,152,175],"techniques":[52],"traditionally":[54],"ignored":[55],"time":[57],"dimension.":[58],"Recurrent":[59],"events,":[60],"persons":[63],"reported":[64,125,131],"locations,":[66],"duration":[69,77],"which":[71],"they":[72],"valid,":[74],"this":[76,92],"should":[78],"be":[79],"part":[80],"rules":[83,213],"or":[84,141],"process":[87,107,230],"would":[88],"simply":[89],"fail.":[90],"In":[91],"work,":[93],"we":[94],"study":[95],"rule":[97],"problem":[99],"temporal":[101,212],"web":[102,115],"Such":[104],"is":[108,155],"challenging":[109],"because":[110,137],"nature":[113],"data;":[116],"extracted":[117],"facts":[118],"(i)":[120],"sparse":[121],"over":[122,134,195,206],"time,":[123],"(ii)":[124],"with":[126,132,149,180,220],"delays,":[127],"(iii)":[129],"often":[130],"values":[136],"inaccurate":[139],"sources":[140],"non":[142],"robust":[143,157],"extractors.":[144],"We":[145],"handle":[146],"challenges":[148],"new":[151],"approach":[153],"that":[154,204,211],"more":[156],"noise.":[159],"Our":[160,192],"solution":[161],"uses":[162],"machine":[163],"learning":[164],"methods,":[165],"association":[168],"measures":[169],"outlier":[171],"detection,":[172],"together":[179],"an":[181,201,221],"aggressive":[182],"repair":[183],"mining":[189],"step":[190],"itself.":[191],"experimental":[193],"evaluation":[194],"real-world":[196],"from":[198,231],"Recorded":[199],"Future,":[200],"intelligence":[202],"company":[203],"monitors":[205],"700K":[207],"Web":[208],"sources,":[209],"shows":[210],"improve":[214],"quality":[216],"increase":[222,239],"average":[225,242],"precision":[226],"0.37":[232],"0.84,":[234],"40%":[237],"relative":[238],"F-measure.":[243]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":5},{"year":2023,"cited_by_count":3},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":5},{"year":2020,"cited_by_count":11},{"year":2019,"cited_by_count":5},{"year":2018,"cited_by_count":6},{"year":2017,"cited_by_count":8},{"year":2016,"cited_by_count":7}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
