{"id":"https://openalex.org/W1987411183","doi":"https://doi.org/10.3115/1072017.1072019","title":"Corpora and data preparation","display_name":"Corpora and data preparation","publication_year":1993,"publication_date":"1993-01-01","ids":{"openalex":"https://openalex.org/W1987411183","doi":"https://doi.org/10.3115/1072017.1072019","mag":"1987411183"},"language":"en","primary_location":{"id":"doi:10.3115/1072017.1072019","is_oa":true,"landing_page_url":"https://doi.org/10.3115/1072017.1072019","pdf_url":"https://dl.acm.org/doi/pdf/10.3115/1072017.1072019","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 5th conference on Message understanding  - MUC5 '93","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.3115/1072017.1072019","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5029490430","display_name":"Lynn Carlson","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lynn Carlson","raw_affiliation_strings":["Ft. Meade, MD","Ft. Meade, MD#TAB#"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Ft. Meade, MD","institution_ids":[]},{"raw_affiliation_string":"Ft. Meade, MD#TAB#","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5083981773","display_name":"Boyan Onyshkevych","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Boyan Onyshkevych","raw_affiliation_strings":["Ft. Meade, MD","Ft. Meade, MD#TAB#"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Ft. Meade, MD","institution_ids":[]},{"raw_affiliation_string":"Ft. Meade, MD#TAB#","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5007297992","display_name":"Mary Ellen Okurowski","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mary Ellen Okurowski","raw_affiliation_strings":["Ft. Meade, MD","Ft. Meade, MD#TAB#"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Ft. Meade, MD","institution_ids":[]},{"raw_affiliation_string":"Ft. Meade, MD#TAB#","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.9208,"has_fulltext":true,"cited_by_count":5,"citation_normalized_percentile":{"value":0.85591229,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"1"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13681","display_name":"Engineering and Information Technology","score":0.7333999872207642,"subfield":{"id":"https://openalex.org/subfields/1712","display_name":"Software"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T13681","display_name":"Engineering and Information Technology","score":0.7333999872207642,"subfield":{"id":"https://openalex.org/subfields/1712","display_name":"Software"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.776258111000061},{"id":"https://openalex.org/keywords/baseline","display_name":"Baseline (sea)","score":0.7264269590377808},{"id":"https://openalex.org/keywords/government","display_name":"Government (linguistics)","score":0.617005467414856},{"id":"https://openalex.org/keywords/selection","display_name":"Selection (genetic algorithm)","score":0.6033079624176025},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5219549536705017},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.4890199601650238},{"id":"https://openalex.org/keywords/data-extraction","display_name":"Data extraction","score":0.4851456582546234},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.43834513425827026},{"id":"https://openalex.org/keywords/software-engineering","display_name":"Software engineering","score":0.37666141986846924},{"id":"https://openalex.org/keywords/knowledge-management","display_name":"Knowledge management","score":0.34468188881874084},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.24775826930999756},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.09922716021537781}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.776258111000061},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.7264269590377808},{"id":"https://openalex.org/C2778137410","wikidata":"https://www.wikidata.org/wiki/Q2732820","display_name":"Government (linguistics)","level":2,"score":0.617005467414856},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.6033079624176025},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5219549536705017},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.4890199601650238},{"id":"https://openalex.org/C2777466982","wikidata":"https://www.wikidata.org/wiki/Q5227287","display_name":"Data extraction","level":3,"score":0.4851456582546234},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.43834513425827026},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.37666141986846924},{"id":"https://openalex.org/C56739046","wikidata":"https://www.wikidata.org/wiki/Q192060","display_name":"Knowledge management","level":1,"score":0.34468188881874084},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.24775826930999756},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.09922716021537781},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C111368507","wikidata":"https://www.wikidata.org/wiki/Q43518","display_name":"Oceanography","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C2779473830","wikidata":"https://www.wikidata.org/wiki/Q1540899","display_name":"MEDLINE","level":2,"score":0.0},{"id":"https://openalex.org/C127313418","wikidata":"https://www.wikidata.org/wiki/Q1069","display_name":"Geology","level":0,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.3115/1072017.1072019","is_oa":true,"landing_page_url":"https://doi.org/10.3115/1072017.1072019","pdf_url":"https://dl.acm.org/doi/pdf/10.3115/1072017.1072019","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 5th conference on Message understanding  - MUC5 '93","raw_type":"proceedings-article"},{"id":"pmh:ADA460923","is_oa":false,"landing_page_url":"http://oai.dtic.mil/oai/oai?&amp;verb=getRecord&amp;metadataPrefix=html&amp;identifier=ADA460923","pdf_url":null,"source":{"id":"https://openalex.org/S4406923043","display_name":"Defense Technical Information Center (DTIC)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"DTIC","raw_type":"Text"}],"best_oa_location":{"id":"doi:10.3115/1072017.1072019","is_oa":true,"landing_page_url":"https://doi.org/10.3115/1072017.1072019","pdf_url":"https://dl.acm.org/doi/pdf/10.3115/1072017.1072019","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 5th conference on Message understanding  - MUC5 '93","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W1987411183.pdf","grobid_xml":"https://content.openalex.org/works/W1987411183.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2383111961","https://openalex.org/W2365952365","https://openalex.org/W2352448290","https://openalex.org/W2380820513","https://openalex.org/W2913146933","https://openalex.org/W2372385138","https://openalex.org/W4296359239","https://openalex.org/W2101155126","https://openalex.org/W2043093291","https://openalex.org/W4225580798"],"abstract_inverted_index":{"The":[0,26],"data":[1,4,45,60,145],"selection":[2,31],"and":[3,12,24,32,63,81,85,103,108,121,147],"preparation":[5,33,146],"efforts":[6,34,126,137],"which":[7],"led":[8],"to":[9,29,42,56,61,72,79,87,124],"the":[10,49,89,118,135,139],"TIPSTER":[11,38],"Fifth":[13],"Message":[14],"Understanding":[15],"Conference":[16],"(MUC-5)":[17],"evaluation":[18],"corpora":[19],"involved":[20],"substantial":[21],"effort,":[22],"time":[23],"resources.":[25],"Government":[27,129],"commitment":[28,114],"these":[30,125],"stems":[35],"from":[36,127],"four":[37],"Program":[39],"objectives:":[40],"(1)":[41],"provide":[43,57,73],"training":[44],"that":[46],"would":[47],"promote":[48],"development":[50],"of":[51,101],"information":[52],"extraction":[53],"technology,":[54],"(2)":[55],"accurate":[58],"test":[59],"evaluate":[62],"baseline":[64,75],"system":[65],"performance":[66,78],"in":[67,105],"an":[68],"objective":[69],"manner,":[70],"(3)":[71],"a":[74,98],"for":[76,141,144,152],"human":[77],"understand":[80],"interpret":[82],"machine":[83],"performance,":[84],"(4)":[86],"support":[88,123],"larger":[90],"Natural":[91],"Language":[92],"Processing":[93],"community":[94],"by":[95],"making":[96],"available":[97],"unique":[99],"set":[100],"texts":[102],"templates":[104],"multiple":[106],"domains":[107],"languages":[109],"under":[110],"ARPA":[111],"support.":[112],"This":[113],"was":[115],"demonstrated":[116],"through":[117,134],"managerial,":[119],"technical,":[120],"administrative":[122],"various":[128],"agencies,":[130],"as":[131,133],"well":[132],"contractual":[136],"with":[138],"Institute":[140],"Defense":[142],"Analyses":[143],"New":[148],"Mexico":[149],"State":[150],"University":[151],"software":[153],"tool":[154],"development.":[155]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
