{"id":"https://openalex.org/W2169638632","doi":"https://doi.org/10.1109/icdm.2002.1183933","title":"Mining significant associations in large scale text corpora","display_name":"Mining significant associations in large scale text corpora","publication_year":2003,"publication_date":"2003-06-26","ids":{"openalex":"https://openalex.org/W2169638632","doi":"https://doi.org/10.1109/icdm.2002.1183933","mag":"2169638632"},"language":"en","primary_location":{"id":"doi:10.1109/icdm.2002.1183933","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icdm.2002.1183933","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2002 IEEE International Conference on Data Mining, 2002. Proceedings.","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5078491738","display_name":"Prabhakar Raghavan","orcid":"https://orcid.org/0000-0001-9853-7604"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"P. Raghavan","raw_affiliation_strings":["Verity, Inc"],"affiliations":[{"raw_affiliation_string":"Verity, Inc","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5066962453","display_name":"Panayiotis Tsaparas","orcid":"https://orcid.org/0000-0002-3490-1507"},"institutions":[{"id":"https://openalex.org/I185261750","display_name":"University of Toronto","ror":"https://ror.org/03dbr7087","country_code":"CA","type":"education","lineage":["https://openalex.org/I185261750"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"P. Tsaparas","raw_affiliation_strings":["Department of Computer Science, University of Toronto, Canada"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, University of Toronto, Canada","institution_ids":["https://openalex.org/I185261750"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5078491738"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.8004,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.86829244,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":"16","issue":null,"first_page":"402","last_page":"409"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10538","display_name":"Data Mining Algorithms and Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10538","display_name":"Data Mining Algorithms and Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11063","display_name":"Rough Sets and Fuzzy Logic","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9972000122070312,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7638314366340637},{"id":"https://openalex.org/keywords/pruning","display_name":"Pruning","score":0.7423184514045715},{"id":"https://openalex.org/keywords/scale","display_name":"Scale (ratio)","score":0.6440725326538086},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.6097674369812012},{"id":"https://openalex.org/keywords/measure","display_name":"Measure (data warehouse)","score":0.5868856906890869},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.49378690123558044},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4846542477607727},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.48000019788742065},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.45328783988952637},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.4411354660987854},{"id":"https://openalex.org/keywords/geography","display_name":"Geography","score":0.06959989666938782}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7638314366340637},{"id":"https://openalex.org/C108010975","wikidata":"https://www.wikidata.org/wiki/Q500094","display_name":"Pruning","level":2,"score":0.7423184514045715},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.6440725326538086},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.6097674369812012},{"id":"https://openalex.org/C2780009758","wikidata":"https://www.wikidata.org/wiki/Q6804172","display_name":"Measure (data warehouse)","level":2,"score":0.5868856906890869},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.49378690123558044},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4846542477607727},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.48000019788742065},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.45328783988952637},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.4411354660987854},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.06959989666938782},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C58640448","wikidata":"https://www.wikidata.org/wiki/Q42515","display_name":"Cartography","level":1,"score":0.0},{"id":"https://openalex.org/C6557445","wikidata":"https://www.wikidata.org/wiki/Q173113","display_name":"Agronomy","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/icdm.2002.1183933","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icdm.2002.1183933","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2002 IEEE International Conference on Data Mining, 2002. Proceedings.","raw_type":"proceedings-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.7.1211","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.7.1211","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://www.cs.toronto.edu/~tsap/publications/icdm02.ps","raw_type":"text"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.90.5537","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.90.5537","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://www.cs.toronto.edu/~tsap/publications/icdm02.pdf","raw_type":"text"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":28,"referenced_works":["https://openalex.org/W58080530","https://openalex.org/W1506285740","https://openalex.org/W1574901103","https://openalex.org/W1592297887","https://openalex.org/W1604803281","https://openalex.org/W1813925448","https://openalex.org/W1940278502","https://openalex.org/W1970161214","https://openalex.org/W1996764654","https://openalex.org/W2037965136","https://openalex.org/W2048779798","https://openalex.org/W2079656678","https://openalex.org/W2083991698","https://openalex.org/W2096322311","https://openalex.org/W2126400629","https://openalex.org/W2144212226","https://openalex.org/W2145296344","https://openalex.org/W2166559705","https://openalex.org/W2210278139","https://openalex.org/W2463095032","https://openalex.org/W2889395214","https://openalex.org/W4238243588","https://openalex.org/W4241122026","https://openalex.org/W6602383291","https://openalex.org/W6630198464","https://openalex.org/W6640448907","https://openalex.org/W6681242987","https://openalex.org/W6719649792"],"related_works":["https://openalex.org/W4255837520","https://openalex.org/W2373300491","https://openalex.org/W2395294869","https://openalex.org/W2378744544","https://openalex.org/W2387011115","https://openalex.org/W2594301978","https://openalex.org/W2379704676","https://openalex.org/W1998810860","https://openalex.org/W4206442282","https://openalex.org/W2384505857"],"abstract_inverted_index":{"Mining":[0],"large-scale":[1],"text":[2],"corpora":[3],"is":[4],"an":[5],"essential":[6],"step":[7],"in":[8,13,88],"extracting":[9],"the":[10,25,36],"key":[11],"themes":[12],"a":[14,18,79],"corpus.":[15],"We":[16,34,75],"motivate":[17],"quantitative":[19],"measure":[20,86],"for":[21,49],"significant":[22,43],"associations":[23,44],"through":[24],"distributions":[26],"of":[27,31,39,63,82],"pairs":[28],"and":[29,45,71],"triplets":[30],"co-occurring":[32],"words.":[33],"consider":[35],"algorithmic":[37],"problem":[38],"efficiently":[40],"enumerating":[41],"such":[42],"present":[46,76],"pruning":[47],"algorithms":[48,60],"these":[50],"problems,":[51],"with":[52],"theoretical":[53],"as":[54,56],"well":[55],"empirical":[57],"analyses.":[58],"Our":[59],"make":[61],"use":[62],"two":[64],"novel":[65],"mining":[66],"methods:":[67],"(1)":[68],"matrix":[69],"mining,":[70],"(2)":[72],"shortened":[73],"documents.":[74],"evidence":[77],"from":[78],"diverse":[80],"set":[81],"documents":[83],"that":[84],"our":[85],"does":[87],"fact":[89],"elicit":[90],"interesting":[91],"co-occurrences.":[92]},"counts_by_year":[{"year":2013,"cited_by_count":1}],"updated_date":"2026-04-05T17:49:38.594831","created_date":"2025-10-10T00:00:00"}
