{"id":"https://openalex.org/W1978833300","doi":"https://doi.org/10.3115/1117794.1117817","title":"Combining lexical and formatting cues for named entity acquisition from the web","display_name":"Combining lexical and formatting cues for named entity acquisition from the web","publication_year":2000,"publication_date":"2000-01-01","ids":{"openalex":"https://openalex.org/W1978833300","doi":"https://doi.org/10.3115/1117794.1117817","mag":"1978833300"},"language":"en","primary_location":{"id":"doi:10.3115/1117794.1117817","is_oa":true,"landing_page_url":"https://doi.org/10.3115/1117794.1117817","pdf_url":"https://dl.acm.org/doi/pdf/10.3115/1117794.1117817","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2000 Joint SIGDAT conference on Empirical methods in natural language processing and very large corpora held in conjunction with the 38th Annual Meeting of the Association for Computational Linguistics -","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.3115/1117794.1117817","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5014763438","display_name":"Christian Jacquemin","orcid":null},"institutions":[{"id":"https://openalex.org/I1294671590","display_name":"Centre National de la Recherche Scientifique","ror":"https://ror.org/02feahw73","country_code":"FR","type":"funder","lineage":["https://openalex.org/I1294671590"]},{"id":"https://openalex.org/I4210115485","display_name":"Laboratoire d'Informatique pour la M\u00e9canique et les Sciences de l'Ing\u00e9nieur","ror":"https://ror.org/01raq4x89","country_code":"FR","type":"facility","lineage":["https://openalex.org/I102197404","https://openalex.org/I1294671590","https://openalex.org/I4210115485","https://openalex.org/I4210159245"]}],"countries":["FR"],"is_corresponding":true,"raw_author_name":"Christian Jacquemin","raw_affiliation_strings":["CNRS-LIMSI, ORSAY Cedex, France"],"affiliations":[{"raw_affiliation_string":"CNRS-LIMSI, ORSAY Cedex, France","institution_ids":["https://openalex.org/I4210115485","https://openalex.org/I1294671590"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5044358853","display_name":"Caroline Bush","orcid":null},"institutions":[{"id":"https://openalex.org/I4210115485","display_name":"Laboratoire d'Informatique pour la M\u00e9canique et les Sciences de l'Ing\u00e9nieur","ror":"https://ror.org/01raq4x89","country_code":"FR","type":"facility","lineage":["https://openalex.org/I102197404","https://openalex.org/I1294671590","https://openalex.org/I4210115485","https://openalex.org/I4210159245"]},{"id":"https://openalex.org/I1294671590","display_name":"Centre National de la Recherche Scientifique","ror":"https://ror.org/02feahw73","country_code":"FR","type":"funder","lineage":["https://openalex.org/I1294671590"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Caroline Bush","raw_affiliation_strings":["CNRS-LIMSI, ORSAY Cedex, France"],"affiliations":[{"raw_affiliation_string":"CNRS-LIMSI, ORSAY Cedex, France","institution_ids":["https://openalex.org/I4210115485","https://openalex.org/I1294671590"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5014763438"],"corresponding_institution_ids":["https://openalex.org/I1294671590","https://openalex.org/I4210115485"],"apc_list":null,"apc_paid":null,"fwci":1.5914,"has_fulltext":true,"cited_by_count":12,"citation_normalized_percentile":{"value":0.86197084,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"13","issue":null,"first_page":"181","last_page":"189"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10215","display_name":"Semantic Web and Ontologies","score":0.9919999837875366,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/disk-formatting","display_name":"Disk formatting","score":0.9611893892288208},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8506252765655518},{"id":"https://openalex.org/keywords/parsing","display_name":"Parsing","score":0.762069582939148},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5900790095329285},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.4418693780899048},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.42741796374320984},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.34950369596481323},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.0957222580909729}],"concepts":[{"id":"https://openalex.org/C88006597","wikidata":"https://www.wikidata.org/wiki/Q690117","display_name":"Disk formatting","level":2,"score":0.9611893892288208},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8506252765655518},{"id":"https://openalex.org/C186644900","wikidata":"https://www.wikidata.org/wiki/Q194152","display_name":"Parsing","level":2,"score":0.762069582939148},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5900790095329285},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.4418693780899048},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.42741796374320984},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.34950369596481323},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0957222580909729}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.3115/1117794.1117817","is_oa":true,"landing_page_url":"https://doi.org/10.3115/1117794.1117817","pdf_url":"https://dl.acm.org/doi/pdf/10.3115/1117794.1117817","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2000 Joint SIGDAT conference on Empirical methods in natural language processing and very large corpora held in conjunction with the 38th Annual Meeting of the Association for Computational Linguistics -","raw_type":"proceedings-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.12.2812","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.12.2812","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://acl.ldc.upenn.edu/W/W00/W00-1323.pdf","raw_type":"text"}],"best_oa_location":{"id":"doi:10.3115/1117794.1117817","is_oa":true,"landing_page_url":"https://doi.org/10.3115/1117794.1117817","pdf_url":"https://dl.acm.org/doi/pdf/10.3115/1117794.1117817","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2000 Joint SIGDAT conference on Empirical methods in natural language processing and very large corpora held in conjunction with the 38th Annual Meeting of the Association for Computational Linguistics -","raw_type":"proceedings-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.5600000023841858}],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W1978833300.pdf","grobid_xml":"https://content.openalex.org/works/W1978833300.grobid-xml"},"referenced_works_count":13,"referenced_works":["https://openalex.org/W36303344","https://openalex.org/W128995279","https://openalex.org/W1522263329","https://openalex.org/W1553019137","https://openalex.org/W1607576325","https://openalex.org/W2028567134","https://openalex.org/W2087926223","https://openalex.org/W2113873049","https://openalex.org/W2163953154","https://openalex.org/W2270423142","https://openalex.org/W2890387018","https://openalex.org/W3088846185","https://openalex.org/W4285719527"],"related_works":["https://openalex.org/W2748952813","https://openalex.org/W4244466418","https://openalex.org/W2104062382","https://openalex.org/W2162878363","https://openalex.org/W2389021890","https://openalex.org/W2479325685","https://openalex.org/W4381248170","https://openalex.org/W3189621521","https://openalex.org/W2173794830","https://openalex.org/W1502858101"],"abstract_inverted_index":{"Because":[0],"of":[1,28],"their":[2],"constant":[3],"renewal,":[4],"it":[5],"is":[6],"necessary":[7],"to":[8,43],"acquire":[9],"fresh":[10],"named":[11],"entities":[12],"(NEs)":[13],"from":[14,30],"recent":[15],"text":[16],"sources.":[17],"We":[18],"present":[19],"a":[20,35],"tool":[21],"for":[22,64],"the":[23,26,31],"acquisition":[24],"and":[25,37,48,67],"typing":[27],"NEs":[29],"Web":[32],"that":[33],"associates":[34],"harvester":[36],"three":[38],"parallel":[39],"shallow":[40],"parsers":[41,51],"dedicated":[42],"specific":[44],"structures":[45],"(lists,":[46],"enumerations,":[47],"anchors).":[49],"The":[50],"combine":[52],"lexical":[53],"indices":[54],"such":[55],"as":[56],"discourse":[57],"markers":[58],"with":[59],"formatting":[60],"instructions":[61],"(HTML":[62],"tags)":[63],"analyzing":[65],"enumerations":[66],"associated":[68],"initializers.":[69]},"counts_by_year":[],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
