{"id":"https://openalex.org/W2051289496","doi":"https://doi.org/10.1093/llc/fqm010","title":"Discovery of Language Resources on the Web: Information Extraction from Heterogeneous Documents","display_name":"Discovery of Language Resources on the Web: Information Extraction from Heterogeneous Documents","publication_year":2007,"publication_date":"2007-04-20","ids":{"openalex":"https://openalex.org/W2051289496","doi":"https://doi.org/10.1093/llc/fqm010","mag":"2051289496"},"language":"en","primary_location":{"id":"doi:10.1093/llc/fqm010","is_oa":false,"landing_page_url":"https://doi.org/10.1093/llc/fqm010","pdf_url":null,"source":{"id":"https://openalex.org/S84784070","display_name":"Literary and Linguistic Computing","issn_l":"0268-1145","issn":["0268-1145","1477-4615"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310311648","host_organization_name":"Oxford University Press","host_organization_lineage":["https://openalex.org/P4310311648","https://openalex.org/P4310311647"],"host_organization_lineage_names":["Oxford University Press","University of Oxford"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Literary and Linguistic Computing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5032186518","display_name":"Vladim\u00edr Pekar","orcid":null},"institutions":[{"id":"https://openalex.org/I119664326","display_name":"University of Wolverhampton","ror":"https://ror.org/01k2y1055","country_code":"GB","type":"education","lineage":["https://openalex.org/I119664326"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"V. Pekar","raw_affiliation_strings":["School of Humanities, Languages, and Social Sciences, University of Wolverhampton, Stafford Street, Wolverhampton, WV1 1SB, UK"],"affiliations":[{"raw_affiliation_string":"School of Humanities, Languages, and Social Sciences, University of Wolverhampton, Stafford Street, Wolverhampton, WV1 1SB, UK","institution_ids":["https://openalex.org/I119664326"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5007426493","display_name":"Richard Evans","orcid":"https://orcid.org/0000-0002-1220-8605"},"institutions":[{"id":"https://openalex.org/I119664326","display_name":"University of Wolverhampton","ror":"https://ror.org/01k2y1055","country_code":"GB","type":"education","lineage":["https://openalex.org/I119664326"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"R. Evans","raw_affiliation_strings":["School of Humanities, Languages, and Social Sciences, University of Wolverhampton, Stafford Street, Wolverhampton, WV1 1SB, UK"],"affiliations":[{"raw_affiliation_string":"School of Humanities, Languages, and Social Sciences, University of Wolverhampton, Stafford Street, Wolverhampton, WV1 1SB, UK","institution_ids":["https://openalex.org/I119664326"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5032186518"],"corresponding_institution_ids":["https://openalex.org/I119664326"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.11216954,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"22","issue":"3","first_page":"329","last_page":"343"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9944999814033508,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9886000156402588,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8720966577529907},{"id":"https://openalex.org/keywords/information-extraction","display_name":"Information extraction","score":0.7189750671386719},{"id":"https://openalex.org/keywords/web-crawler","display_name":"Web crawler","score":0.6440648436546326},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.6059224605560303},{"id":"https://openalex.org/keywords/web-page","display_name":"Web page","score":0.5980049967765808},{"id":"https://openalex.org/keywords/coreference","display_name":"Coreference","score":0.5925813913345337},{"id":"https://openalex.org/keywords/terminology","display_name":"Terminology","score":0.5718796253204346},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5645005702972412},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.5260722041130066},{"id":"https://openalex.org/keywords/named-entity-recognition","display_name":"Named-entity recognition","score":0.5164473056793213},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.45697176456451416},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.41991811990737915},{"id":"https://openalex.org/keywords/field","display_name":"Field (mathematics)","score":0.4188763499259949},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.387284517288208},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3489900231361389},{"id":"https://openalex.org/keywords/resolution","display_name":"Resolution (logic)","score":0.2911677956581116}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8720966577529907},{"id":"https://openalex.org/C195807954","wikidata":"https://www.wikidata.org/wiki/Q1662562","display_name":"Information extraction","level":2,"score":0.7189750671386719},{"id":"https://openalex.org/C13743948","wikidata":"https://www.wikidata.org/wiki/Q45842","display_name":"Web crawler","level":2,"score":0.6440648436546326},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.6059224605560303},{"id":"https://openalex.org/C21959979","wikidata":"https://www.wikidata.org/wiki/Q36774","display_name":"Web page","level":2,"score":0.5980049967765808},{"id":"https://openalex.org/C28076734","wikidata":"https://www.wikidata.org/wiki/Q63087","display_name":"Coreference","level":3,"score":0.5925813913345337},{"id":"https://openalex.org/C547195049","wikidata":"https://www.wikidata.org/wiki/Q1725664","display_name":"Terminology","level":2,"score":0.5718796253204346},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5645005702972412},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.5260722041130066},{"id":"https://openalex.org/C2779135771","wikidata":"https://www.wikidata.org/wiki/Q403574","display_name":"Named-entity recognition","level":3,"score":0.5164473056793213},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.45697176456451416},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.41991811990737915},{"id":"https://openalex.org/C9652623","wikidata":"https://www.wikidata.org/wiki/Q190109","display_name":"Field (mathematics)","level":2,"score":0.4188763499259949},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.387284517288208},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3489900231361389},{"id":"https://openalex.org/C138268822","wikidata":"https://www.wikidata.org/wiki/Q1051925","display_name":"Resolution (logic)","level":2,"score":0.2911677956581116},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C202444582","wikidata":"https://www.wikidata.org/wiki/Q837863","display_name":"Pure mathematics","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1093/llc/fqm010","is_oa":false,"landing_page_url":"https://doi.org/10.1093/llc/fqm010","pdf_url":null,"source":{"id":"https://openalex.org/S84784070","display_name":"Literary and Linguistic Computing","issn_l":"0268-1145","issn":["0268-1145","1477-4615"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310311648","host_organization_name":"Oxford University Press","host_organization_lineage":["https://openalex.org/P4310311648","https://openalex.org/P4310311647"],"host_organization_lineage_names":["Oxford University Press","University of Oxford"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Literary and Linguistic Computing","raw_type":"journal-article"},{"id":"pmh:oai:wlv.openrepository.com:2436/15899","is_oa":false,"landing_page_url":"http://hdl.handle.net/2436/15899","pdf_url":null,"source":{"id":"https://openalex.org/S4306401000","display_name":"Wolverhampton Intellectual Repository and E-Theses (University of Wolverhampton)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I119664326","host_organization_name":"University of Wolverhampton","host_organization_lineage":["https://openalex.org/I119664326"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.5099999904632568,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":18,"referenced_works":["https://openalex.org/W4508078","https://openalex.org/W1553019137","https://openalex.org/W1934019294","https://openalex.org/W1975175514","https://openalex.org/W1986398135","https://openalex.org/W1994485670","https://openalex.org/W2012179495","https://openalex.org/W2026080185","https://openalex.org/W2044070623","https://openalex.org/W2059933135","https://openalex.org/W2093559286","https://openalex.org/W2118020653","https://openalex.org/W2123504579","https://openalex.org/W2143349571","https://openalex.org/W2149430911","https://openalex.org/W2151823253","https://openalex.org/W2162340487","https://openalex.org/W4285719527"],"related_works":["https://openalex.org/W4385695127","https://openalex.org/W2389761961","https://openalex.org/W4254300012","https://openalex.org/W2963706618","https://openalex.org/W2113184419","https://openalex.org/W1999548128","https://openalex.org/W2102475112","https://openalex.org/W2916255597","https://openalex.org/W1521215947","https://openalex.org/W3091569222"],"abstract_inverted_index":{"The":[0],"present":[1,137],"article":[2],"is":[3,90],"concerned":[4],"with":[5,51,100,129,148,185],"the":[6,34,52,81,92,143,167,173,178,192],"problem":[7],"of":[8,36,80,83,94,112,140,145,154,180],"automatic":[9],"database":[10,184],"population":[11],"via":[12],"information":[13,95,168,186],"extraction":[14,169],"(IE)":[15],"from":[16,20,41],"web":[17,84,146],"pages":[18,147],"obtained":[19],"heterogeneous":[21],"sources,":[22],"such":[23,56,151],"as":[24,57,152],"those":[25],"retrieved":[26],"by":[27],"a":[28,44,101,113,138,183],"domain":[29,157],"crawler.":[30],"Specifically,":[31],"we":[32,98],"address":[33,99],"task":[35,179],"filling":[37],"single":[38],"multi-field":[39],"templates":[40],"individual":[42],"documents,":[43],"common":[45],"scenario":[46,71],"that":[47,67,106,115],"involves":[48],"free-format":[49],"documents":[50],"same":[53],"communicative":[54],"goal":[55],"job":[58],"adverts,":[59],"CVs,":[60],"or":[61],"meeting/seminar":[62],"announcements.":[63],"We":[64,135,171],"discuss":[65],"challenges":[66],"arise":[68],"in":[69],"this":[70],"and":[72,122,159,162],"propose":[73],"solutions":[74],"to":[75,109,118,131],"them":[76],"at":[77],"different":[78],"levels":[79],"processing":[82],"page":[85,114],"content.":[86],"Our":[87],"main":[88],"focus":[89],"on":[91,166,177,187,191],"issue":[93],"extraction,":[96],"which":[97,130],"two-step":[102],"machine":[103],"learning":[104],"approach":[105],"first":[107],"aims":[108],"determine":[110],"segments":[111],"are":[116],"likely":[117],"contain":[119],"relevant":[120],"facts":[121],"then":[123],"delimits":[124],"specific":[125],"natural":[126],"language":[127,188],"expressions":[128],"fill":[132],"template":[133],"fields.":[134],"also":[136],"range":[139],"techniques":[141],"for":[142],"enrichment":[144],"semantic":[149],"annotations,":[150],"recognition":[153],"named":[155],"entities,":[156],"terminology":[158],"coreference":[160],"resolution,":[161],"examine":[163],"their":[164],"effect":[165],"method.":[170],"evaluate":[172],"developed":[174],"IE":[175],"system":[176],"automatically":[181],"populating":[182],"resources":[189],"available":[190],"web.":[193]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
