{"id":"https://openalex.org/W2095422717","doi":"https://doi.org/10.1145/2063576.2063935","title":"Constructing efficient information extraction pipelines","display_name":"Constructing efficient information extraction pipelines","publication_year":2011,"publication_date":"2011-10-24","ids":{"openalex":"https://openalex.org/W2095422717","doi":"https://doi.org/10.1145/2063576.2063935","mag":"2095422717"},"language":"en","primary_location":{"id":"doi:10.1145/2063576.2063935","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2063576.2063935","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 20th ACM international conference on Information and knowledge management","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5014375244","display_name":"Henning Wachsmuth","orcid":"https://orcid.org/0000-0003-2792-621X"},"institutions":[{"id":"https://openalex.org/I206945453","display_name":"Paderborn University","ror":"https://ror.org/058kzsd48","country_code":"DE","type":"education","lineage":["https://openalex.org/I206945453"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Henning Wachsmuth","raw_affiliation_strings":["Universit\u00e4t Paderborn, s-lab, Paderborn, Germany"],"affiliations":[{"raw_affiliation_string":"Universit\u00e4t Paderborn, s-lab, Paderborn, Germany","institution_ids":["https://openalex.org/I206945453"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5027915931","display_name":"Benno Stein","orcid":"https://orcid.org/0000-0001-9033-2217"},"institutions":[{"id":"https://openalex.org/I51441396","display_name":"Bauhaus-Universit\u00e4t Weimar","ror":"https://ror.org/033bb5z47","country_code":"DE","type":"education","lineage":["https://openalex.org/I51441396"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Benno Stein","raw_affiliation_strings":["Bauhaus-Universit\u00e4t Weimar, Weimar, Germany"],"affiliations":[{"raw_affiliation_string":"Bauhaus-Universit\u00e4t Weimar, Weimar, Germany","institution_ids":["https://openalex.org/I51441396"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5022823882","display_name":"Gregor Engels","orcid":"https://orcid.org/0000-0001-5397-9548"},"institutions":[{"id":"https://openalex.org/I206945453","display_name":"Paderborn University","ror":"https://ror.org/058kzsd48","country_code":"DE","type":"education","lineage":["https://openalex.org/I206945453"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Gregor Engels","raw_affiliation_strings":["Universit\u00e4t Paderborn, s-lab, Paderborn, Germany"],"affiliations":[{"raw_affiliation_string":"Universit\u00e4t Paderborn, s-lab, Paderborn, Germany","institution_ids":["https://openalex.org/I206945453"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5014375244"],"corresponding_institution_ids":["https://openalex.org/I206945453"],"apc_list":null,"apc_paid":null,"fwci":2.8006,"has_fulltext":false,"cited_by_count":12,"citation_normalized_percentile":{"value":0.9084313,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":97},"biblio":{"volume":"28","issue":null,"first_page":"2237","last_page":"2240"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.9951000213623047,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.9951000213623047,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9901999831199646,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9901000261306763,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/pipeline-transport","display_name":"Pipeline transport","score":0.918174147605896},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7660732269287109},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.758582592010498},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5897414684295654},{"id":"https://openalex.org/keywords/scheduling","display_name":"Scheduling (production processes)","score":0.576071560382843},{"id":"https://openalex.org/keywords/information-extraction","display_name":"Information extraction","score":0.5397257804870605},{"id":"https://openalex.org/keywords/construct","display_name":"Construct (python library)","score":0.529272198677063},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.46726861596107483},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.4663495421409607},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.200152188539505},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.18148425221443176},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.13406530022621155},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.08008277416229248},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.06123536825180054}],"concepts":[{"id":"https://openalex.org/C175309249","wikidata":"https://www.wikidata.org/wiki/Q725864","display_name":"Pipeline transport","level":2,"score":0.918174147605896},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7660732269287109},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.758582592010498},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5897414684295654},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.576071560382843},{"id":"https://openalex.org/C195807954","wikidata":"https://www.wikidata.org/wiki/Q1662562","display_name":"Information extraction","level":2,"score":0.5397257804870605},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.529272198677063},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.46726861596107483},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.4663495421409607},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.200152188539505},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.18148425221443176},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.13406530022621155},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.08008277416229248},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.06123536825180054},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0},{"id":"https://openalex.org/C87717796","wikidata":"https://www.wikidata.org/wiki/Q146326","display_name":"Environmental engineering","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/2063576.2063935","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2063576.2063935","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 20th ACM international conference on Information and knowledge management","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":16,"referenced_works":["https://openalex.org/W57058802","https://openalex.org/W1522263329","https://openalex.org/W1594297443","https://openalex.org/W2004763266","https://openalex.org/W2004972423","https://openalex.org/W2060565333","https://openalex.org/W2087787226","https://openalex.org/W2096891167","https://openalex.org/W2099162170","https://openalex.org/W2106310992","https://openalex.org/W2118584553","https://openalex.org/W2121856526","https://openalex.org/W2139403546","https://openalex.org/W2150114207","https://openalex.org/W2413158316","https://openalex.org/W4285719527"],"related_works":["https://openalex.org/W4380433113","https://openalex.org/W4386072068","https://openalex.org/W252339960","https://openalex.org/W2390529043","https://openalex.org/W2378320433","https://openalex.org/W2358343511","https://openalex.org/W2051877971","https://openalex.org/W1970117064","https://openalex.org/W1787170397","https://openalex.org/W4292347844"],"abstract_inverted_index":{"Information":[0],"Extraction":[1],"(IE)":[2],"pipelines":[3,25,88,97],"analyze":[4,32],"text":[5],"through":[6],"several":[7],"stages.":[8],"The":[9],"pipeline's":[10,53,136],"algorithms":[11],"determine":[12],"both":[13],"its":[14,17],"effectiveness":[15],"and":[16],"run-time":[18,84],"efficiency.":[19],"In":[20,76],"real-world":[21],"tasks,":[22],"however,":[23],"IE":[24,67,87],"often":[26],"fail":[27],"acceptable":[28],"run-times":[29],"because":[30],"they":[31],"too":[33],"much":[34,44],"task-irrelevant":[35],"text.":[36],"This":[37],"raises":[38],"two":[39],"interesting":[40],"questions:":[41],"1)":[42],"How":[43],"\"efficiency":[45],"potential\"":[46],"depends":[47],"on":[48,102],"the":[49,83,107],"scheduling":[50],"of":[51,86,93,109,124,129],"a":[52,61,90,116,135],"algorithms?":[54],"2)":[55],"Is":[56],"it":[57],"possible":[58,132],"to":[59,64,81,126],"devise":[60],"reliable":[62],"method":[63],"construct":[65],"efficient":[66],"pipelines?":[68],"Both":[69],"questions":[70],"are":[71,131],"addressed":[72],"in":[73],"this":[74],"paper.":[75],"particular,":[77],"we":[78,119],"show":[79],"how":[80],"optimize":[82],"efficiency":[85,122],"under":[89],"given":[91],"set":[92],"algorithms.":[94],"We":[95],"evaluate":[96],"for":[98],"three":[99],"algorithm":[100],"sets":[101],"an":[103],"industrially":[104],"relevant":[105],"task:":[106],"extraction":[108],"market":[110],"forecasts":[111],"from":[112],"news":[113],"articles.":[114],"Using":[115],"system-independent":[117],"measure,":[118],"demonstrate":[120],"that":[121],"gains":[123],"up":[125],"one":[127],"order":[128],"magnitude":[130],"without":[133],"compromising":[134],"original":[137],"effectiveness.":[138]},"counts_by_year":[{"year":2020,"cited_by_count":2},{"year":2017,"cited_by_count":1},{"year":2016,"cited_by_count":1},{"year":2014,"cited_by_count":3},{"year":2013,"cited_by_count":3},{"year":2012,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
