{"id":"https://openalex.org/W2118778987","doi":"https://doi.org/10.3115/1219840.1219886","title":"Unsupervised learning of field segmentation models for information extraction","display_name":"Unsupervised learning of field segmentation models for information extraction","publication_year":2005,"publication_date":"2005-01-01","ids":{"openalex":"https://openalex.org/W2118778987","doi":"https://doi.org/10.3115/1219840.1219886","mag":"2118778987"},"language":"en","primary_location":{"id":"doi:10.3115/1219840.1219886","is_oa":true,"landing_page_url":"https://doi.org/10.3115/1219840.1219886","pdf_url":"https://dl.acm.org/doi/pdf/10.3115/1219840.1219886","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 43rd Annual Meeting on Association for Computational Linguistics  - ACL '05","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.3115/1219840.1219886","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5002013404","display_name":"Trond Grenager","orcid":null},"institutions":[{"id":"https://openalex.org/I97018004","display_name":"Stanford University","ror":"https://ror.org/00f54p054","country_code":"US","type":"education","lineage":["https://openalex.org/I97018004"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Trond Grenager","raw_affiliation_strings":["Stanford University, Stanford, CA","Stanford University Stanford CA"],"affiliations":[{"raw_affiliation_string":"Stanford University, Stanford, CA","institution_ids":["https://openalex.org/I97018004"]},{"raw_affiliation_string":"Stanford University Stanford CA","institution_ids":["https://openalex.org/I97018004"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004921249","display_name":"Dan Klein","orcid":"https://orcid.org/0000-0002-8881-1902"},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Dan Klein","raw_affiliation_strings":["U.C. Berkeley, Berkeley, CA","U.C. Berkeley, Berkeley, CA#TAB#"],"affiliations":[{"raw_affiliation_string":"U.C. Berkeley, Berkeley, CA","institution_ids":["https://openalex.org/I95457486"]},{"raw_affiliation_string":"U.C. Berkeley, Berkeley, CA#TAB#","institution_ids":["https://openalex.org/I95457486"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5046006076","display_name":"Christopher D. Manning","orcid":"https://orcid.org/0000-0001-6155-649X"},"institutions":[{"id":"https://openalex.org/I97018004","display_name":"Stanford University","ror":"https://ror.org/00f54p054","country_code":"US","type":"education","lineage":["https://openalex.org/I97018004"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Christopher D. Manning","raw_affiliation_strings":["Stanford University, Stanford, CA","Stanford University Stanford CA"],"affiliations":[{"raw_affiliation_string":"Stanford University, Stanford, CA","institution_ids":["https://openalex.org/I97018004"]},{"raw_affiliation_string":"Stanford University Stanford CA","institution_ids":["https://openalex.org/I97018004"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5002013404"],"corresponding_institution_ids":["https://openalex.org/I97018004"],"apc_list":null,"apc_paid":null,"fwci":7.7638,"has_fulltext":true,"cited_by_count":75,"citation_normalized_percentile":{"value":0.97226456,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"371","last_page":"378"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9972000122070312,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7762221097946167},{"id":"https://openalex.org/keywords/hidden-markov-model","display_name":"Hidden Markov model","score":0.7234914898872375},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.7027285099029541},{"id":"https://openalex.org/keywords/unsupervised-learning","display_name":"Unsupervised learning","score":0.6901576519012451},{"id":"https://openalex.org/keywords/field","display_name":"Field (mathematics)","score":0.5927596092224121},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.5840883851051331},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.5811928510665894},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4647478461265564},{"id":"https://openalex.org/keywords/information-extraction","display_name":"Information extraction","score":0.4564746022224426},{"id":"https://openalex.org/keywords/generative-grammar","display_name":"Generative grammar","score":0.4353313148021698},{"id":"https://openalex.org/keywords/supervised-learning","display_name":"Supervised learning","score":0.4276910424232483},{"id":"https://openalex.org/keywords/simple","display_name":"Simple (philosophy)","score":0.416176438331604},{"id":"https://openalex.org/keywords/labeled-data","display_name":"Labeled data","score":0.41315385699272156},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.19699889421463013},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.11953821778297424}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7762221097946167},{"id":"https://openalex.org/C23224414","wikidata":"https://www.wikidata.org/wiki/Q176769","display_name":"Hidden Markov model","level":2,"score":0.7234914898872375},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7027285099029541},{"id":"https://openalex.org/C8038995","wikidata":"https://www.wikidata.org/wiki/Q1152135","display_name":"Unsupervised learning","level":2,"score":0.6901576519012451},{"id":"https://openalex.org/C9652623","wikidata":"https://www.wikidata.org/wiki/Q190109","display_name":"Field (mathematics)","level":2,"score":0.5927596092224121},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.5840883851051331},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5811928510665894},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4647478461265564},{"id":"https://openalex.org/C195807954","wikidata":"https://www.wikidata.org/wiki/Q1662562","display_name":"Information extraction","level":2,"score":0.4564746022224426},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.4353313148021698},{"id":"https://openalex.org/C136389625","wikidata":"https://www.wikidata.org/wiki/Q334384","display_name":"Supervised learning","level":3,"score":0.4276910424232483},{"id":"https://openalex.org/C2780586882","wikidata":"https://www.wikidata.org/wiki/Q7520643","display_name":"Simple (philosophy)","level":2,"score":0.416176438331604},{"id":"https://openalex.org/C2776145971","wikidata":"https://www.wikidata.org/wiki/Q30673951","display_name":"Labeled data","level":2,"score":0.41315385699272156},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.19699889421463013},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.11953821778297424},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C202444582","wikidata":"https://www.wikidata.org/wiki/Q837863","display_name":"Pure mathematics","level":1,"score":0.0}],"mesh":[],"locations_count":5,"locations":[{"id":"doi:10.3115/1219840.1219886","is_oa":true,"landing_page_url":"https://doi.org/10.3115/1219840.1219886","pdf_url":"https://dl.acm.org/doi/pdf/10.3115/1219840.1219886","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 43rd Annual Meeting on Association for Computational Linguistics  - ACL '05","raw_type":"proceedings-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.122.8127","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.122.8127","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://acl.ldc.upenn.edu/P/P05/P05-1046.pdf","raw_type":"text"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.75.6612","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.75.6612","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://nlp.stanford.edu/cmanning/papers/unsupie_final.ps","raw_type":"text"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.77.668","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.77.668","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://www.stanford.edu/~grenager/papers/unsupie_final.ps","raw_type":"text"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.79.350","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.79.350","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://nlp.stanford.edu/cmanning/papers/unsupie_final.pdf","raw_type":"text"}],"best_oa_location":{"id":"doi:10.3115/1219840.1219886","is_oa":true,"landing_page_url":"https://doi.org/10.3115/1219840.1219886","pdf_url":"https://dl.acm.org/doi/pdf/10.3115/1219840.1219886","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 43rd Annual Meeting on Association for Computational Linguistics  - ACL '05","raw_type":"proceedings-article"},"sustainable_development_goals":[{"score":0.7099999785423279,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2118778987.pdf","grobid_xml":"https://content.openalex.org/works/W2118778987.grobid-xml"},"referenced_works_count":14,"referenced_works":["https://openalex.org/W1534730506","https://openalex.org/W1560013842","https://openalex.org/W1608864735","https://openalex.org/W1699498167","https://openalex.org/W1828401780","https://openalex.org/W1934019294","https://openalex.org/W1999337483","https://openalex.org/W2053569739","https://openalex.org/W2118370253","https://openalex.org/W2125838338","https://openalex.org/W2159481891","https://openalex.org/W2949952668","https://openalex.org/W4235768290","https://openalex.org/W4253573210"],"related_works":["https://openalex.org/W3148060700","https://openalex.org/W3080681248","https://openalex.org/W4376646226","https://openalex.org/W3047177827","https://openalex.org/W4287685660","https://openalex.org/W2057778272","https://openalex.org/W4319302697","https://openalex.org/W2986085304","https://openalex.org/W2163389298","https://openalex.org/W2794908468"],"abstract_inverted_index":{"The":[0],"applicability":[1],"of":[2,36,76,86,95,133,136],"many":[3],"current":[4],"information":[5],"extraction":[6,25],"techniques":[7],"is":[8],"severely":[9],"limited":[10],"by":[11,90,118],"the":[12,84,87,96],"need":[13],"for":[14,21,61],"supervised":[15,119],"training":[16],"data.":[17,138],"We":[18],"demonstrate":[19],"that":[20,104,126],"certain":[22],"field":[23,62],"structured":[24,63],"tasks,":[26],"such":[27],"as":[28],"classified":[29],"advertisements":[30],"and":[31,125],"bibliographic":[32],"citations,":[33],"small":[34,134],"amounts":[35,135],"prior":[37,93],"knowledge":[38,94],"can":[39,81,107,129],"be":[40],"used":[41],"to":[42,70,115],"learn":[43,71],"effective":[44],"models":[45,54],"in":[46,74],"a":[47,57],"primarily":[48],"unsupervised":[49,66,105],"fashion.":[50],"Although":[51],"hidden":[52],"Markov":[53],"(HMMs)":[55],"provide":[56],"suitable":[58],"generative":[59],"model":[60],"text,":[64],"general":[65],"HMM":[67],"learning":[68],"fails":[69],"useful":[72],"structure":[73,89],"either":[75],"our":[77],"domains.":[78],"However,":[79],"one":[80],"dramatically":[82],"improve":[83],"quality":[85],"learned":[88],"exploiting":[91],"simple":[92],"desired":[97],"solutions.":[98],"In":[99],"both":[100],"domains,":[101],"we":[102],"found":[103],"methods":[106,120,128],"attain":[108],"accuracies":[109],"with":[110],"400":[111],"unlabeled":[112],"examples":[113],"comparable":[114],"those":[116],"attained":[117],"on":[121],"50":[122],"labeled":[123,137],"examples,":[124],"semi-supervised":[127],"make":[130],"good":[131],"use":[132]},"counts_by_year":[{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":1},{"year":2017,"cited_by_count":3},{"year":2016,"cited_by_count":1},{"year":2015,"cited_by_count":3},{"year":2014,"cited_by_count":4},{"year":2013,"cited_by_count":3},{"year":2012,"cited_by_count":8}],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
