{"id":"https://openalex.org/W2103128401","doi":"https://doi.org/10.1145/1559845.1559881","title":"Optimizing complex extraction programs over evolving text data","display_name":"Optimizing complex extraction programs over evolving text data","publication_year":2009,"publication_date":"2009-06-29","ids":{"openalex":"https://openalex.org/W2103128401","doi":"https://doi.org/10.1145/1559845.1559881","mag":"2103128401"},"language":"en","primary_location":{"id":"doi:10.1145/1559845.1559881","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1559845.1559881","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2009 ACM SIGMOD International Conference on Management of data","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100405406","display_name":"Fei Chen","orcid":"https://orcid.org/0000-0002-6381-623X"},"institutions":[{"id":"https://openalex.org/I135310074","display_name":"University of Wisconsin\u2013Madison","ror":"https://ror.org/01y2jtd41","country_code":"US","type":"education","lineage":["https://openalex.org/I135310074"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Fei Chen","raw_affiliation_strings":["University of Wisconsin-Madison, Madison, WI, USA"],"affiliations":[{"raw_affiliation_string":"University of Wisconsin-Madison, Madison, WI, USA","institution_ids":["https://openalex.org/I135310074"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110667925","display_name":"Byron J. Gao","orcid":null},"institutions":[{"id":"https://openalex.org/I13511017","display_name":"Texas State University","ror":"https://ror.org/05h9q1g27","country_code":"US","type":"education","lineage":["https://openalex.org/I13511017"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Byron J. Gao","raw_affiliation_strings":["Texas State University-San Marcos, San Marcos, TX, USA","Texas State University-San Marcos, San Marcos, TX, USA#TAB#"],"affiliations":[{"raw_affiliation_string":"Texas State University-San Marcos, San Marcos, TX, USA","institution_ids":["https://openalex.org/I13511017"]},{"raw_affiliation_string":"Texas State University-San Marcos, San Marcos, TX, USA#TAB#","institution_ids":["https://openalex.org/I13511017"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110256670","display_name":"AnHai Doan","orcid":null},"institutions":[{"id":"https://openalex.org/I135310074","display_name":"University of Wisconsin\u2013Madison","ror":"https://ror.org/01y2jtd41","country_code":"US","type":"education","lineage":["https://openalex.org/I135310074"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"AnHai Doan","raw_affiliation_strings":["University of Wisconsin-Madison, Madison, WI, USA"],"affiliations":[{"raw_affiliation_string":"University of Wisconsin-Madison, Madison, WI, USA","institution_ids":["https://openalex.org/I135310074"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101801967","display_name":"Jun Yang","orcid":"https://orcid.org/0000-0002-4901-8530"},"institutions":[{"id":"https://openalex.org/I170897317","display_name":"Duke University","ror":"https://ror.org/00py81415","country_code":"US","type":"education","lineage":["https://openalex.org/I170897317"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jun Yang","raw_affiliation_strings":["Duke University, Durham, NC, USA"],"affiliations":[{"raw_affiliation_string":"Duke University, Durham, NC, USA","institution_ids":["https://openalex.org/I170897317"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5051301731","display_name":"Raghu Ramakrishnan","orcid":"https://orcid.org/0009-0007-5086-7664"},"institutions":[{"id":"https://openalex.org/I4210134091","display_name":"Yahoo (United States)","ror":"https://ror.org/040dkzz12","country_code":"US","type":"company","lineage":["https://openalex.org/I4210134091"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Raghu Ramakrishnan","raw_affiliation_strings":["Yahoo! Research, Santa Clara, CA, USA","Yahoo! Research, Santa Clara , CA, USA"],"affiliations":[{"raw_affiliation_string":"Yahoo! Research, Santa Clara, CA, USA","institution_ids":["https://openalex.org/I4210134091"]},{"raw_affiliation_string":"Yahoo! Research, Santa Clara , CA, USA","institution_ids":["https://openalex.org/I4210134091"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5100405406"],"corresponding_institution_ids":["https://openalex.org/I135310074"],"apc_list":null,"apc_paid":null,"fwci":5.1821,"has_fulltext":false,"cited_by_count":14,"citation_normalized_percentile":{"value":0.95715024,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"321","last_page":"334"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13083","display_name":"Advanced Text Analysis Techniques","score":0.994700014591217,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9943000078201294,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8733383417129517},{"id":"https://openalex.org/keywords/snapshot","display_name":"Snapshot (computer storage)","score":0.7566186189651489},{"id":"https://openalex.org/keywords/workflow","display_name":"Workflow","score":0.747830867767334},{"id":"https://openalex.org/keywords/scratch","display_name":"Scratch","score":0.7249666452407837},{"id":"https://openalex.org/keywords/information-extraction","display_name":"Information extraction","score":0.5231791138648987},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3946428894996643},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.39281439781188965},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.36938729882240295},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.364971399307251},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.31187719106674194},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.23212620615959167}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8733383417129517},{"id":"https://openalex.org/C55282118","wikidata":"https://www.wikidata.org/wiki/Q252683","display_name":"Snapshot (computer storage)","level":2,"score":0.7566186189651489},{"id":"https://openalex.org/C177212765","wikidata":"https://www.wikidata.org/wiki/Q627335","display_name":"Workflow","level":2,"score":0.747830867767334},{"id":"https://openalex.org/C2781235140","wikidata":"https://www.wikidata.org/wiki/Q275131","display_name":"Scratch","level":2,"score":0.7249666452407837},{"id":"https://openalex.org/C195807954","wikidata":"https://www.wikidata.org/wiki/Q1662562","display_name":"Information extraction","level":2,"score":0.5231791138648987},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3946428894996643},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.39281439781188965},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.36938729882240295},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.364971399307251},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.31187719106674194},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.23212620615959167}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/1559845.1559881","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1559845.1559881","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2009 ACM SIGMOD International Conference on Management of data","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":30,"referenced_works":["https://openalex.org/W4508078","https://openalex.org/W10197252","https://openalex.org/W158294556","https://openalex.org/W1483194439","https://openalex.org/W1562880585","https://openalex.org/W1595917421","https://openalex.org/W1598467895","https://openalex.org/W1965685479","https://openalex.org/W2006149654","https://openalex.org/W2018928332","https://openalex.org/W2022760666","https://openalex.org/W2030530147","https://openalex.org/W2035015991","https://openalex.org/W2035266017","https://openalex.org/W2038378248","https://openalex.org/W2095733021","https://openalex.org/W2096797897","https://openalex.org/W2103128401","https://openalex.org/W2103224511","https://openalex.org/W2124943418","https://openalex.org/W2127711252","https://openalex.org/W2130200371","https://openalex.org/W2134145495","https://openalex.org/W2144358319","https://openalex.org/W2144416276","https://openalex.org/W2146304342","https://openalex.org/W2157022538","https://openalex.org/W2168943018","https://openalex.org/W2170907470","https://openalex.org/W2913389685"],"related_works":["https://openalex.org/W2475116013","https://openalex.org/W2066741154","https://openalex.org/W2770018148","https://openalex.org/W2358308169","https://openalex.org/W2385135707","https://openalex.org/W2140315382","https://openalex.org/W2059109728","https://openalex.org/W2082556335","https://openalex.org/W322691623","https://openalex.org/W2494989134"],"abstract_inverted_index":{"Most":[0],"information":[1,34],"extraction":[2],"(IE)":[3],"approaches":[4],"have":[5,66],"considered":[6],"only":[7,16,104],"static":[8],"text":[9,20],"corpora,":[10],"over":[11,27,81],"which":[12],"we":[13,38,65],"apply":[14,41],"IE":[15,42,49,75,80,105,111,116,124],"once.":[17],"Many":[18],"real-world":[19],"corpora":[21],"however":[22,97],"are":[23,118],"dynamic.":[24],"They":[25],"evolve":[26],"time,":[28],"and":[29],"so":[30],"to":[31,36,44,52,77],"keep":[32],"extracted":[33],"up":[35,79],"date":[37],"often":[39],"must":[40],"repeatedly,":[43],"consecutive":[45],"corpus":[46,83],"snapshots.":[47,84],"Applying":[48],"from":[50],"scratch":[51],"each":[53],"snapshot":[54],"can":[55],"take":[56],"a":[57,70,109,128],"lot":[58],"of":[59,90],"time.":[60],"To":[61],"avoid":[62],"doing":[63],"this,":[64],"recently":[67],"developed":[68],"Cyclex,":[69],"system":[71],"that":[72,101,107],"recycles":[73],"previous":[74],"results":[76],"speed":[78],"subsequent":[82],"Cyclex":[85],"clearly":[86],"demonstrated":[87],"the":[88,91],"promise":[89],"recycling":[92],"idea.":[93],"The":[94],"work":[95],"itself":[96],"is":[98],"limited":[99],"in":[100,127],"it":[102],"considers":[103],"programs":[106,117],"contain":[108],"single":[110],"``blackbox.''":[112],"In":[113],"practice,":[114],"many":[115],"far":[119],"more":[120],"complex,":[121],"containing":[122],"multiple":[123],"blackboxes":[125],"connected":[126],"compositional":[129],"``workflow.''":[130]},"counts_by_year":[{"year":2023,"cited_by_count":2},{"year":2020,"cited_by_count":1},{"year":2018,"cited_by_count":1},{"year":2013,"cited_by_count":3},{"year":2012,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
