{"id":"https://openalex.org/W2121940249","doi":"https://doi.org/10.3115/1034678.1034679","title":"Untangling text data mining","display_name":"Untangling text data mining","publication_year":1999,"publication_date":"1999-01-01","ids":{"openalex":"https://openalex.org/W2121940249","doi":"https://doi.org/10.3115/1034678.1034679","mag":"2121940249"},"language":"en","primary_location":{"id":"doi:10.3115/1034678.1034679","is_oa":true,"landing_page_url":"https://doi.org/10.3115/1034678.1034679","pdf_url":"https://dl.acm.org/doi/pdf/10.3115/1034678.1034679","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 37th annual meeting of the Association for Computational Linguistics on Computational Linguistics  -","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.3115/1034678.1034679","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5019933387","display_name":"Marti A. Hearst","orcid":"https://orcid.org/0000-0002-4346-1603"},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Marti A. Hearst","raw_affiliation_strings":["University of California, Berkeley, Berkeley, CA","University of California\u2014Berkeley , Berkeley, CA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of California, Berkeley, Berkeley, CA","institution_ids":["https://openalex.org/I95457486"]},{"raw_affiliation_string":"University of California\u2014Berkeley , Berkeley, CA","institution_ids":["https://openalex.org/I95457486"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5019933387"],"corresponding_institution_ids":["https://openalex.org/I95457486"],"apc_list":null,"apc_paid":null,"fwci":30.1944,"has_fulltext":true,"cited_by_count":859,"citation_normalized_percentile":{"value":0.99710753,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"3","last_page":"10"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13083","display_name":"Advanced Text Analysis Techniques","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T13083","display_name":"Advanced Text Analysis Techniques","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9977999925613403,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/decipher","display_name":"DECIPHER","score":0.8555763959884644},{"id":"https://openalex.org/keywords/conflation","display_name":"Conflation","score":0.8084381818771362},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6755530834197998},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.5557010769844055},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.49543559551239014},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.38095730543136597},{"id":"https://openalex.org/keywords/bioinformatics","display_name":"Bioinformatics","score":0.11558997631072998},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.10637915134429932},{"id":"https://openalex.org/keywords/biology","display_name":"Biology","score":0.06829950213432312}],"concepts":[{"id":"https://openalex.org/C164614171","wikidata":"https://www.wikidata.org/wiki/Q5204775","display_name":"DECIPHER","level":2,"score":0.8555763959884644},{"id":"https://openalex.org/C130440534","wikidata":"https://www.wikidata.org/wiki/Q14946528","display_name":"Conflation","level":2,"score":0.8084381818771362},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6755530834197998},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.5557010769844055},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.49543559551239014},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.38095730543136597},{"id":"https://openalex.org/C60644358","wikidata":"https://www.wikidata.org/wiki/Q128570","display_name":"Bioinformatics","level":1,"score":0.11558997631072998},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.10637915134429932},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.06829950213432312},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":6,"locations":[{"id":"doi:10.3115/1034678.1034679","is_oa":true,"landing_page_url":"https://doi.org/10.3115/1034678.1034679","pdf_url":"https://dl.acm.org/doi/pdf/10.3115/1034678.1034679","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 37th annual meeting of the Association for Computational Linguistics on Computational Linguistics  -","raw_type":"proceedings-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.104.4522","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.104.4522","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://liris.cnrs.fr/~amille/enseignements/master_ia/Alain/exposes_2005/Untangling_text_Data_Mining.pdf","raw_type":"text"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.31.1393","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.31.1393","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://www.sims.berkeley.edu/~hearst/papers/acl99/acl99.ps","raw_type":"text"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.99.8510","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.99.8510","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://ece.ut.ac.ir/dbrg/seminars/Vazifedoust-Bayat/Alireza/Read/UntanglingTextDataMining.pdf","raw_type":"text"},{"id":"pmh:oai:acl.sr.language-archives.org:P99-1001","is_oa":false,"landing_page_url":"http://www.aclweb.org/anthology/P99-1001","pdf_url":null,"source":{"id":"https://openalex.org/S4306402137","display_name":"The COCOON platform (University of Paris)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I102197404","host_organization_name":"Universit\u00e9 Paris-Sud","host_organization_lineage":["https://openalex.org/I102197404"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":""},{"id":"pmh:oai:americanae.aecid.es:3673727","is_oa":false,"landing_page_url":"http://americanae.aecid.es/americanae/es/registros/registro.do?tipoRegistro=MTD&idBib=3673727","pdf_url":null,"source":{"id":"https://openalex.org/S4306400786","display_name":"Americanae (AECID Library)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":""}],"best_oa_location":{"id":"doi:10.3115/1034678.1034679","is_oa":true,"landing_page_url":"https://doi.org/10.3115/1034678.1034679","pdf_url":"https://dl.acm.org/doi/pdf/10.3115/1034678.1034679","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 37th annual meeting of the Association for Computational Linguistics on Computational Linguistics  -","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2121940249.pdf","grobid_xml":"https://content.openalex.org/works/W2121940249.grobid-xml"},"referenced_works_count":46,"referenced_works":["https://openalex.org/W41401133","https://openalex.org/W128995279","https://openalex.org/W167430441","https://openalex.org/W1602194043","https://openalex.org/W1660390307","https://openalex.org/W1979459060","https://openalex.org/W1981202432","https://openalex.org/W1996764654","https://openalex.org/W2024030746","https://openalex.org/W2028342611","https://openalex.org/W2032739348","https://openalex.org/W2038721957","https://openalex.org/W2045666897","https://openalex.org/W2074011351","https://openalex.org/W2074439471","https://openalex.org/W2080838006","https://openalex.org/W2082751088","https://openalex.org/W2105106523","https://openalex.org/W2112378479","https://openalex.org/W2116401686","https://openalex.org/W2122978558","https://openalex.org/W2127163275","https://openalex.org/W2132736871","https://openalex.org/W2135909747","https://openalex.org/W2138621811","https://openalex.org/W2138745909","https://openalex.org/W2140319639","https://openalex.org/W2141018272","https://openalex.org/W2143525943","https://openalex.org/W2145059330","https://openalex.org/W2147881096","https://openalex.org/W2154011161","https://openalex.org/W2167044614","https://openalex.org/W2169353895","https://openalex.org/W2319794630","https://openalex.org/W2611194857","https://openalex.org/W3015789953","https://openalex.org/W4231856373","https://openalex.org/W4235160186","https://openalex.org/W4236112757","https://openalex.org/W4236329806","https://openalex.org/W4237926422","https://openalex.org/W4241122026","https://openalex.org/W4243886826","https://openalex.org/W4256358309","https://openalex.org/W4285719527"],"related_works":["https://openalex.org/W2748952813","https://openalex.org/W2058252362","https://openalex.org/W2186092498","https://openalex.org/W2115350162","https://openalex.org/W1982687909","https://openalex.org/W2352149790","https://openalex.org/W2008166176","https://openalex.org/W1984630168","https://openalex.org/W2373256103","https://openalex.org/W2330145053"],"abstract_inverted_index":{"The":[0],"possibilities":[1],"for":[2,34],"data":[3,44],"mining":[4,45],"from":[5],"large":[6],"text":[7,43,69],"collections":[8],"are":[9],"virtually":[10],"untapped.":[11],"Text":[12],"expresses":[13],"a":[14,25],"vast,":[15],"rich":[16],"range":[17],"of":[18,68],"information,":[19],"but":[20],"encodes":[21],"this":[22,35],"information":[23,61],"in":[24,42],"form":[26],"that":[27],"is":[28],"difficult":[29],"to":[30,46,71],"decipher":[31],"automatically.":[32],"Perhaps":[33],"reason,":[36],"there":[37],"has":[38],"been":[39],"little":[40],"work":[41],"date,":[47],"and":[48],"most":[49],"people":[50],"who":[51],"have":[52,56,64],"talked":[53],"about":[54],"it":[55,59],"either":[57],"conflated":[58],"with":[60],"access":[62],"or":[63],"not":[65],"made":[66],"use":[67],"directly":[70],"discover":[72],"heretofore":[73],"unknown":[74],"information.":[75]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":12},{"year":2024,"cited_by_count":16},{"year":2023,"cited_by_count":26},{"year":2022,"cited_by_count":31},{"year":2021,"cited_by_count":36},{"year":2020,"cited_by_count":35},{"year":2019,"cited_by_count":33},{"year":2018,"cited_by_count":28},{"year":2017,"cited_by_count":49},{"year":2016,"cited_by_count":26},{"year":2015,"cited_by_count":33},{"year":2014,"cited_by_count":45},{"year":2013,"cited_by_count":29},{"year":2012,"cited_by_count":29}],"updated_date":"2026-05-09T13:55:54.758798","created_date":"2025-10-10T00:00:00"}
