{"id":"https://openalex.org/W2116548113","doi":"https://doi.org/10.1145/2594538.2594540","title":"Cleaning inconsistencies in information extraction via prioritized repairs","display_name":"Cleaning inconsistencies in information extraction via prioritized repairs","publication_year":2014,"publication_date":"2014-06-18","ids":{"openalex":"https://openalex.org/W2116548113","doi":"https://doi.org/10.1145/2594538.2594540","mag":"2116548113"},"language":"en","primary_location":{"id":"doi:10.1145/2594538.2594540","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2594538.2594540","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 33rd ACM SIGMOD-SIGACT-SIGART symposium on Principles of database systems","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5027117465","display_name":"Ronald Fagin","orcid":"https://orcid.org/0000-0002-7374-0347"},"institutions":[{"id":"https://openalex.org/I4210085935","display_name":"IBM Research - Almaden","ror":"https://ror.org/005w8dd04","country_code":"US","type":"facility","lineage":["https://openalex.org/I1341412227","https://openalex.org/I4210085935","https://openalex.org/I4210114115"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Ronald Fagin","raw_affiliation_strings":["IBM Research -- Almaden, San Jose, CA, USA"],"affiliations":[{"raw_affiliation_string":"IBM Research -- Almaden, San Jose, CA, USA","institution_ids":["https://openalex.org/I4210085935"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5006706357","display_name":"Benny Kimelfeld","orcid":"https://orcid.org/0000-0002-7156-1572"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Benny Kimelfeld","raw_affiliation_strings":["LogicBlox, Inc., Berkeley, CA, USA"],"affiliations":[{"raw_affiliation_string":"LogicBlox, Inc., Berkeley, CA, USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5074416632","display_name":"Frederick Reiss","orcid":"https://orcid.org/0009-0002-1254-1798"},"institutions":[{"id":"https://openalex.org/I4210085935","display_name":"IBM Research - Almaden","ror":"https://ror.org/005w8dd04","country_code":"US","type":"facility","lineage":["https://openalex.org/I1341412227","https://openalex.org/I4210085935","https://openalex.org/I4210114115"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Frederick Reiss","raw_affiliation_strings":["IBM Research -- Almaden, San Jose, CA, USA"],"affiliations":[{"raw_affiliation_string":"IBM Research -- Almaden, San Jose, CA, USA","institution_ids":["https://openalex.org/I4210085935"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5022558461","display_name":"Stijn Vansummeren","orcid":"https://orcid.org/0000-0001-7793-9049"},"institutions":[{"id":"https://openalex.org/I132053463","display_name":"Universit\u00e9 Libre de Bruxelles","ror":"https://ror.org/01r9htc13","country_code":"BE","type":"education","lineage":["https://openalex.org/I132053463"]}],"countries":["BE"],"is_corresponding":false,"raw_author_name":"Stijn Vansummeren","raw_affiliation_strings":["Universit\u00e9 Libre de Bruxelles (ULB), Bruxelles, Belgium"],"affiliations":[{"raw_affiliation_string":"Universit\u00e9 Libre de Bruxelles (ULB), Bruxelles, Belgium","institution_ids":["https://openalex.org/I132053463"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5027117465"],"corresponding_institution_ids":["https://openalex.org/I4210085935"],"apc_list":null,"apc_paid":null,"fwci":5.0445,"has_fulltext":false,"cited_by_count":21,"citation_normalized_percentile":{"value":0.95199523,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"164","last_page":"175"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9972000122070312,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9972000122070312,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12127","display_name":"Software System Performance and Reliability","score":0.9965000152587891,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9926999807357788,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8238085508346558},{"id":"https://openalex.org/keywords/atomicity","display_name":"Atomicity","score":0.4882817268371582},{"id":"https://openalex.org/keywords/schema","display_name":"Schema (genetic algorithms)","score":0.4848805367946625},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.47038733959198},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.45158112049102783},{"id":"https://openalex.org/keywords/information-extraction","display_name":"Information extraction","score":0.4290992319583893},{"id":"https://openalex.org/keywords/coding","display_name":"Coding (social sciences)","score":0.41519707441329956},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.33019906282424927},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.30045565962791443},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.26380589604377747},{"id":"https://openalex.org/keywords/database-transaction","display_name":"Database transaction","score":0.15811264514923096}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8238085508346558},{"id":"https://openalex.org/C188045909","wikidata":"https://www.wikidata.org/wiki/Q3306359","display_name":"Atomicity","level":3,"score":0.4882817268371582},{"id":"https://openalex.org/C52146309","wikidata":"https://www.wikidata.org/wiki/Q7431116","display_name":"Schema (genetic algorithms)","level":2,"score":0.4848805367946625},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.47038733959198},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.45158112049102783},{"id":"https://openalex.org/C195807954","wikidata":"https://www.wikidata.org/wiki/Q1662562","display_name":"Information extraction","level":2,"score":0.4290992319583893},{"id":"https://openalex.org/C179518139","wikidata":"https://www.wikidata.org/wiki/Q5140297","display_name":"Coding (social sciences)","level":2,"score":0.41519707441329956},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.33019906282424927},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.30045565962791443},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.26380589604377747},{"id":"https://openalex.org/C75949130","wikidata":"https://www.wikidata.org/wiki/Q848010","display_name":"Database transaction","level":2,"score":0.15811264514923096},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0}],"mesh":[],"locations_count":5,"locations":[{"id":"doi:10.1145/2594538.2594540","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2594538.2594540","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 33rd ACM SIGMOD-SIGACT-SIGART symposium on Principles of database systems","raw_type":"proceedings-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.648.7601","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.648.7601","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://researcher.watson.ibm.com/researcher/files/us-fagin/pods14.pdf","raw_type":"text"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.719.6605","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.719.6605","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://benny.net.technion.ac.il/files/2015/02/pods14-inconsistency.pdf","raw_type":"text"},{"id":"pmh:oai:dipot.ulb.ac.be:2013/172358","is_oa":false,"landing_page_url":"http://hdl.handle.net/2013/ULB-DIPOT:oai:dipot.ulb.ac.be:2013/172358","pdf_url":null,"source":{"id":"https://openalex.org/S4306401063","display_name":"D\u00e9p\u00f4t institutionnel de l'Universit\u00e9 libre de Bruxelles (Universit\u00e9 Libre de Bruxelles)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I132053463","host_organization_name":"Universit\u00e9 Libre de Bruxelles","host_organization_lineage":["https://openalex.org/I132053463"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"In: Proceedings of the 33rd ACM SIGMOD-SIGACT-SIGART Symposium on Principles of Database Systems, PODS'14,","raw_type":"info:ulb-repo/semantics/openurl/proceeding"},{"id":"pmh:oai:documentserver.uhasselt.be:1942/33423","is_oa":false,"landing_page_url":"http://hdl.handle.net/1942/33423","pdf_url":null,"source":{"id":"https://openalex.org/S4306401926","display_name":"Document Server@UHasselt (UHasselt)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I878454856","host_organization_name":"Hasselt University","host_organization_lineage":["https://openalex.org/I878454856"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"info:eu-repo/semantics/conferenceObject"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":38,"referenced_works":["https://openalex.org/W133886465","https://openalex.org/W1502749598","https://openalex.org/W1506336181","https://openalex.org/W1563635790","https://openalex.org/W1599188306","https://openalex.org/W1607983314","https://openalex.org/W1934019294","https://openalex.org/W1967693793","https://openalex.org/W1969595233","https://openalex.org/W1979960035","https://openalex.org/W1986891954","https://openalex.org/W1999563429","https://openalex.org/W2020230082","https://openalex.org/W2035266017","https://openalex.org/W2059009730","https://openalex.org/W2068882115","https://openalex.org/W2077518845","https://openalex.org/W2080480757","https://openalex.org/W2080803375","https://openalex.org/W2096797897","https://openalex.org/W2098720114","https://openalex.org/W2102632804","https://openalex.org/W2122112897","https://openalex.org/W2133973199","https://openalex.org/W2135209143","https://openalex.org/W2144416276","https://openalex.org/W2147805208","https://openalex.org/W2147880316","https://openalex.org/W2152322100","https://openalex.org/W2152336115","https://openalex.org/W2159636537","https://openalex.org/W2164949130","https://openalex.org/W2169940602","https://openalex.org/W2185669627","https://openalex.org/W2201604718","https://openalex.org/W2295240344","https://openalex.org/W2569536984","https://openalex.org/W4285719527"],"related_works":["https://openalex.org/W1591836442","https://openalex.org/W2357517130","https://openalex.org/W1990697576","https://openalex.org/W1520405624","https://openalex.org/W1582361381","https://openalex.org/W2115269980","https://openalex.org/W1600422503","https://openalex.org/W4303183859","https://openalex.org/W2001866549","https://openalex.org/W2372888775"],"abstract_inverted_index":{"The":[0],"population":[1],"of":[2,81,100,160,170,176,182,190,199,210,246,268,280,286],"a":[3,17,62,73,97,171,249,258],"predefined":[4],"relational":[5],"schema":[6],"from":[7,106],"textual":[8,30],"content,":[9],"commonly":[10],"known":[11],"as":[12,68,207,231,233],"Information":[13],"Extraction":[14],"(IE),":[15],"is":[16,46,79,147,252],"pervasive":[18],"task":[19],"in":[20,34,84,114,140,178,257,292],"contemporary":[21],"computational":[22],"challenges":[23],"associated":[24],"with":[25,77],"Big":[26],"Data.":[27],"Since":[28],"the":[29,55,153,168,188,197,211,227,234,244,265,269],"content":[31],"varies":[32],"widely":[33],"nature":[35],"and":[36,72,92,261,276,284],"structure":[37],"(from":[38],"machine":[39],"logs":[40],"to":[41,49,103,125,129,151,215,289],"informal":[42],"natural":[43],"language),":[44],"it":[45,146,263],"notoriously":[47],"difficult":[48],"write":[50],"IE":[51,85,88,127],"programs":[52],"that":[53,223],"extract":[54],"sought":[56],"information":[57],"without":[58,156],"any":[59],"inconsistencies":[60,78,105,177],"(e.g.,":[61],"substring":[63],"should":[64],"not":[65,148],"be":[66],"annotated":[67],"both":[69,274],"an":[70,115,141,208],"address":[71],"person":[74],"name).":[75],"Dealing":[76],"hence":[80],"crucial":[82],"importance":[83],"systems.":[86],"Industrial-strength":[87],"systems":[89],"like":[90,124],"GATE":[91],"IBM":[93],"SystemT":[94],"therefore":[95],"provide":[96],"built-in":[98,154],"collection":[99],"cleaning":[101,136,175,229,250],"operations":[102,137,155],"remove":[104],"extracted":[107],"relations.":[108],"These":[109],"operations,":[110],"however,":[111],"are":[112,138,282],"collected":[113],"ad-hoc":[116],"fashion":[117],"through":[118,239],"use":[119],"cases.":[120],"Ideally,":[121],"we":[122,195],"would":[123],"allow":[126],"developers":[128],"declare":[130],"their":[131],"own":[132],"policies.":[133],"But":[134],"existing":[135],"defined":[139],"algorithmic":[142],"way":[143],"and,":[144],"hence,":[145],"clear":[149],"how":[150],"extend":[152],"requiring":[157],"low-level":[158],"coding":[159],"internal":[161],"or":[162],"external":[163],"functions.":[164],"We":[165,221,242,272],"embark":[166],"on":[167],"establishment":[169],"framework":[172,225],"for":[173,193,237],"declarative":[174],"IE,":[179,194],"though":[180],"principles":[181],"database":[183,213],"theory.":[184],"Specifically,":[185],"building":[186],"upon":[187],"formalism":[189],"document":[191],"spanners":[192],"adopt":[196],"concept":[198],"prioritized":[200],"repairs,":[201],"which":[202,281,287],"has":[203],"been":[204],"recently":[205],"proposed":[206],"extension":[209],"traditional":[212],"repairs":[214],"incorporate":[216],"priorities":[217],"among":[218],"conflicting":[219],"facts.":[220],"show":[222],"our":[224],"captures":[226],"popular":[228],"policies,":[230],"well":[232],"POSIX":[235],"semantics":[236],"extraction":[238,270],"regular":[240],"expressions.":[241],"explore":[243],"problem":[245],"determining":[247],"whether":[248,262],"declaration":[251],"unambiguous":[253],"(i.e.,":[254],"always":[255],"results":[256],"single":[259],"repair),":[260],"increases":[264],"expressive":[266],"power":[267],"language.":[271],"give":[273],"positive":[275],"negative":[277],"results,":[278],"some":[279,285],"general,":[283],"apply":[288],"policies":[290],"used":[291],"practice.":[293]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2020,"cited_by_count":4},{"year":2019,"cited_by_count":1},{"year":2018,"cited_by_count":2},{"year":2017,"cited_by_count":1},{"year":2016,"cited_by_count":6},{"year":2015,"cited_by_count":3},{"year":2014,"cited_by_count":2}],"updated_date":"2026-04-05T17:49:38.594831","created_date":"2025-10-10T00:00:00"}
