{"id":"https://openalex.org/W1986583960","doi":"https://doi.org/10.1145/1774088.1774471","title":"An adaptive information extraction system based on wrapper induction with POS tagging","display_name":"An adaptive information extraction system based on wrapper induction with POS tagging","publication_year":2010,"publication_date":"2010-03-22","ids":{"openalex":"https://openalex.org/W1986583960","doi":"https://doi.org/10.1145/1774088.1774471","mag":"1986583960"},"language":"en","primary_location":{"id":"doi:10.1145/1774088.1774471","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1774088.1774471","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2010 ACM Symposium on Applied Computing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5021095237","display_name":"Rinaldo Lima","orcid":"https://orcid.org/0000-0002-1388-4824"},"institutions":[{"id":"https://openalex.org/I4210113996","display_name":"Centro Universit\u00e1rio da Cidade","ror":"https://ror.org/032bg8m67","country_code":"BR","type":"education","lineage":["https://openalex.org/I4210113996"]}],"countries":["BR"],"is_corresponding":true,"raw_author_name":"Rinaldo Lima","raw_affiliation_strings":["Cidade Universit\u00e1ria, Recife, PE, Brazil"],"affiliations":[{"raw_affiliation_string":"Cidade Universit\u00e1ria, Recife, PE, Brazil","institution_ids":["https://openalex.org/I4210113996"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109066626","display_name":"Bernard Espinasse","orcid":null},"institutions":[{"id":"https://openalex.org/I4210125985","display_name":"Institut Fresnel","ror":"https://ror.org/03br1wy20","country_code":"FR","type":"facility","lineage":["https://openalex.org/I1294671590","https://openalex.org/I1294671590","https://openalex.org/I21491767","https://openalex.org/I4210095849","https://openalex.org/I4210125985","https://openalex.org/I4210142724"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Bernard Espinasse","raw_affiliation_strings":["Domaine Universitaire de St Jer\u00f4me, Marseille Cedex, France"],"affiliations":[{"raw_affiliation_string":"Domaine Universitaire de St Jer\u00f4me, Marseille Cedex, France","institution_ids":["https://openalex.org/I4210125985"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5103176344","display_name":"Fred Freitas","orcid":"https://orcid.org/0000-0003-0425-6786"},"institutions":[{"id":"https://openalex.org/I4210113996","display_name":"Centro Universit\u00e1rio da Cidade","ror":"https://ror.org/032bg8m67","country_code":"BR","type":"education","lineage":["https://openalex.org/I4210113996"]}],"countries":["BR"],"is_corresponding":false,"raw_author_name":"Fred Freitas","raw_affiliation_strings":["Cidade Universit\u00e1ria, Recife, PE, Brazil"],"affiliations":[{"raw_affiliation_string":"Cidade Universit\u00e1ria, Recife, PE, Brazil","institution_ids":["https://openalex.org/I4210113996"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5021095237"],"corresponding_institution_ids":["https://openalex.org/I4210113996"],"apc_list":null,"apc_paid":null,"fwci":1.3874,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.87376693,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"1815","last_page":"1820"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9976000189781189,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9973000288009644,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8349553942680359},{"id":"https://openalex.org/keywords/boosting","display_name":"Boosting (machine learning)","score":0.7175491452217102},{"id":"https://openalex.org/keywords/information-extraction","display_name":"Information extraction","score":0.6127437949180603},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5151209235191345},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.45861271023750305}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8349553942680359},{"id":"https://openalex.org/C46686674","wikidata":"https://www.wikidata.org/wiki/Q466303","display_name":"Boosting (machine learning)","level":2,"score":0.7175491452217102},{"id":"https://openalex.org/C195807954","wikidata":"https://www.wikidata.org/wiki/Q1662562","display_name":"Information extraction","level":2,"score":0.6127437949180603},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5151209235191345},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.45861271023750305}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/1774088.1774471","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1774088.1774471","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2010 ACM Symposium on Applied Computing","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.6800000071525574}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":8,"referenced_works":["https://openalex.org/W61174534","https://openalex.org/W168632859","https://openalex.org/W171478201","https://openalex.org/W196501055","https://openalex.org/W200042785","https://openalex.org/W1847465294","https://openalex.org/W2058316166","https://openalex.org/W2143349571"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W2125652721","https://openalex.org/W1540371141","https://openalex.org/W4231274751","https://openalex.org/W1549363203","https://openalex.org/W2154063878","https://openalex.org/W2556012038","https://openalex.org/W3204019825","https://openalex.org/W2368651715"],"abstract_inverted_index":{"Information":[0],"Extraction":[1],"(IE)":[2],"performs":[3],"two":[4],"important":[5],"tasks:":[6],"identifying":[7],"certain":[8],"pieces":[9],"of":[10,51,94,103,105],"information":[11],"from":[12],"documents":[13],"and":[14,97,136],"storing":[15],"them":[16],"for":[17,60,112],"future":[18],"use.":[19],"This":[20,55],"work":[21],"proposes":[22],"an":[23],"adaptive":[24],"IE":[25,125],"system":[26,119],"based":[27],"on":[28,83],"Boosted":[29],"Wrapper":[30],"Induction":[31],"(BWI),":[32],"a":[33,100,133],"supervised":[34],"wrapper":[35],"induction":[36],"algorithm.":[37],"However,":[38],"some":[39],"authors":[40],"have":[41],"shown":[42],"that":[43,91],"boosting":[44],"techniques":[45],"face":[46],"difficulties":[47],"during":[48],"the":[49,58,65,92,108,121,139],"processing":[50],"natural":[52],"language":[53],"texts.":[54,114],"fact":[56],"became":[57],"rationale":[59],"coupling":[61],"Parts-of-Speech":[62],"tagging":[63,96],"with":[64,128],"BWI":[66,98,110],"algorithm":[67,111],"in":[68,138],"our":[69,118],"proposed":[70],"system.":[71],"In":[72],"order":[73],"to":[74,132],"evaluate":[75],"its":[76],"performance,":[77],"several":[78],"experiments":[79],"were":[80],"carried":[81],"out":[82],"three":[84],"standard":[85],"corpora.":[86],"The":[87],"results":[88,116],"obtained":[89],"suggest":[90],"union":[93],"POS":[95,129],"offers":[99],"small":[101],"gain":[102],"3--5%":[104],"performance":[106],"over":[107],"original":[109],"unstructured":[113],"These":[115],"position":[117],"among":[120],"very":[122],"best":[123],"similar":[124],"systems":[126],"endowed":[127],"tagging,":[130],"according":[131],"comparison":[134],"presented":[135],"discussed":[137],"article.":[140]},"counts_by_year":[{"year":2022,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
