{"id":"https://openalex.org/W2105258824","doi":"https://doi.org/10.1109/tkde.2002.1047777","title":"A statistical method for estimating the usefulness of text databases","display_name":"A statistical method for estimating the usefulness of text databases","publication_year":2002,"publication_date":"2002-11-01","ids":{"openalex":"https://openalex.org/W2105258824","doi":"https://doi.org/10.1109/tkde.2002.1047777","mag":"2105258824"},"language":"en","primary_location":{"id":"doi:10.1109/tkde.2002.1047777","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tkde.2002.1047777","pdf_url":null,"source":{"id":"https://openalex.org/S30698027","display_name":"IEEE Transactions on Knowledge and Data Engineering","issn_l":"1041-4347","issn":["1041-4347","1558-2191","2326-3865"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Knowledge and Data Engineering","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5000305339","display_name":"King-Lup Liu","orcid":null},"institutions":[{"id":"https://openalex.org/I118353179","display_name":"DePaul University","ror":"https://ror.org/04xtx5t16","country_code":"US","type":"education","lineage":["https://openalex.org/I118353179"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"King-Lup Liu","raw_affiliation_strings":["School of Computer Science, Telecommunications, and Information Systems, De Paul University, Chicago, IL, USA"],"affiliations":[{"raw_affiliation_string":"School of Computer Science, Telecommunications, and Information Systems, De Paul University, Chicago, IL, USA","institution_ids":["https://openalex.org/I118353179"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109224736","display_name":"C. Yu","orcid":"https://orcid.org/0009-0007-3659-1788"},"institutions":[{"id":"https://openalex.org/I39422238","display_name":"University of Illinois Chicago","ror":"https://ror.org/02mpq6x41","country_code":"US","type":"education","lineage":["https://openalex.org/I39422238"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"C. Yu","raw_affiliation_strings":["Department of Electrical Engineering and Computer Science, University of Illinois, Chicago, Chicago, IL, USA"],"affiliations":[{"raw_affiliation_string":"Department of Electrical Engineering and Computer Science, University of Illinois, Chicago, Chicago, IL, USA","institution_ids":["https://openalex.org/I39422238"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101960531","display_name":"Weiyi Meng","orcid":"https://orcid.org/0000-0002-7246-2058"},"institutions":[{"id":"https://openalex.org/I123946342","display_name":"Binghamton University","ror":"https://ror.org/008rmbt77","country_code":"US","type":"education","lineage":["https://openalex.org/I123946342"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Weiyi Meng","raw_affiliation_strings":["Department of Computer Science, Binghamton University, Binghamton, NY, USA"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, Binghamton University, Binghamton, NY, USA","institution_ids":["https://openalex.org/I123946342"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5061069727","display_name":"Wensheng Wu","orcid":"https://orcid.org/0000-0002-2948-9773"},"institutions":[{"id":"https://openalex.org/I39422238","display_name":"University of Illinois Chicago","ror":"https://ror.org/02mpq6x41","country_code":"US","type":"education","lineage":["https://openalex.org/I39422238"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Wensheng Wu","raw_affiliation_strings":["Department of Electrical Engineering and Computer Science, University of Illinois, Chicago, Chicago, IL, USA"],"affiliations":[{"raw_affiliation_string":"Department of Electrical Engineering and Computer Science, University of Illinois, Chicago, Chicago, IL, USA","institution_ids":["https://openalex.org/I39422238"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5109242911","display_name":"Naphtali Rishe","orcid":null},"institutions":[{"id":"https://openalex.org/I19700959","display_name":"Florida International University","ror":"https://ror.org/02gz6gg07","country_code":"US","type":"education","lineage":["https://openalex.org/I19700959"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"N. Rishe","raw_affiliation_strings":["School of Computer Science, Florida International University, Miami, FL, USA"],"affiliations":[{"raw_affiliation_string":"School of Computer Science, Florida International University, Miami, FL, USA","institution_ids":["https://openalex.org/I19700959"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5000305339"],"corresponding_institution_ids":["https://openalex.org/I118353179"],"apc_list":null,"apc_paid":null,"fwci":3.178,"has_fulltext":false,"cited_by_count":39,"citation_normalized_percentile":{"value":0.92716452,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":97},"biblio":{"volume":"14","issue":"6","first_page":"1422","last_page":"1437"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11106","display_name":"Data Management and Algorithms","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11106","display_name":"Data Management and Algorithms","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.9973999857902527,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9958999752998352,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8350882530212402},{"id":"https://openalex.org/keywords/search-engine","display_name":"Search engine","score":0.8018520474433899},{"id":"https://openalex.org/keywords/web-query-classification","display_name":"Web query classification","score":0.7378321886062622},{"id":"https://openalex.org/keywords/web-search-query","display_name":"Web search query","score":0.720805287361145},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.6763045191764832},{"id":"https://openalex.org/keywords/search-oriented-architecture","display_name":"Search-oriented architecture","score":0.6740982532501221},{"id":"https://openalex.org/keywords/query-expansion","display_name":"Query expansion","score":0.657518208026886},{"id":"https://openalex.org/keywords/query-optimization","display_name":"Query optimization","score":0.6059602499008179},{"id":"https://openalex.org/keywords/metasearch-engine","display_name":"Metasearch engine","score":0.6022952795028687},{"id":"https://openalex.org/keywords/the-internet","display_name":"The Internet","score":0.5902806520462036},{"id":"https://openalex.org/keywords/sargable","display_name":"Sargable","score":0.5623224973678589},{"id":"https://openalex.org/keywords/search-analytics","display_name":"Search analytics","score":0.535548210144043},{"id":"https://openalex.org/keywords/query-language","display_name":"Query language","score":0.5127888321876526},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.47327518463134766},{"id":"https://openalex.org/keywords/ranking","display_name":"Ranking (information retrieval)","score":0.46452391147613525},{"id":"https://openalex.org/keywords/search-engine-indexing","display_name":"Search engine indexing","score":0.43526989221572876},{"id":"https://openalex.org/keywords/phrase-search","display_name":"Phrase search","score":0.4273388385772705},{"id":"https://openalex.org/keywords/interface","display_name":"Interface (matter)","score":0.4138641357421875},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.17448031902313232}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8350882530212402},{"id":"https://openalex.org/C97854310","wikidata":"https://www.wikidata.org/wiki/Q19541","display_name":"Search engine","level":2,"score":0.8018520474433899},{"id":"https://openalex.org/C118689300","wikidata":"https://www.wikidata.org/wiki/Q7978614","display_name":"Web query classification","level":4,"score":0.7378321886062622},{"id":"https://openalex.org/C164120249","wikidata":"https://www.wikidata.org/wiki/Q995982","display_name":"Web search query","level":3,"score":0.720805287361145},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.6763045191764832},{"id":"https://openalex.org/C157154645","wikidata":"https://www.wikidata.org/wiki/Q7441612","display_name":"Search-oriented architecture","level":5,"score":0.6740982532501221},{"id":"https://openalex.org/C99016210","wikidata":"https://www.wikidata.org/wiki/Q5488129","display_name":"Query expansion","level":2,"score":0.657518208026886},{"id":"https://openalex.org/C157692150","wikidata":"https://www.wikidata.org/wiki/Q2919848","display_name":"Query optimization","level":2,"score":0.6059602499008179},{"id":"https://openalex.org/C173979980","wikidata":"https://www.wikidata.org/wiki/Q114106","display_name":"Metasearch engine","level":4,"score":0.6022952795028687},{"id":"https://openalex.org/C110875604","wikidata":"https://www.wikidata.org/wiki/Q75","display_name":"The Internet","level":2,"score":0.5902806520462036},{"id":"https://openalex.org/C192939062","wikidata":"https://www.wikidata.org/wiki/Q104840822","display_name":"Sargable","level":4,"score":0.5623224973678589},{"id":"https://openalex.org/C14838553","wikidata":"https://www.wikidata.org/wiki/Q7441639","display_name":"Search analytics","level":4,"score":0.535548210144043},{"id":"https://openalex.org/C192028432","wikidata":"https://www.wikidata.org/wiki/Q845739","display_name":"Query language","level":2,"score":0.5127888321876526},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.47327518463134766},{"id":"https://openalex.org/C189430467","wikidata":"https://www.wikidata.org/wiki/Q7293293","display_name":"Ranking (information retrieval)","level":2,"score":0.46452391147613525},{"id":"https://openalex.org/C75165309","wikidata":"https://www.wikidata.org/wiki/Q2258979","display_name":"Search engine indexing","level":2,"score":0.43526989221572876},{"id":"https://openalex.org/C37202355","wikidata":"https://www.wikidata.org/wiki/Q7188071","display_name":"Phrase search","level":5,"score":0.4273388385772705},{"id":"https://openalex.org/C113843644","wikidata":"https://www.wikidata.org/wiki/Q901882","display_name":"Interface (matter)","level":4,"score":0.4138641357421875},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.17448031902313232},{"id":"https://openalex.org/C129307140","wikidata":"https://www.wikidata.org/wiki/Q6795880","display_name":"Maximum bubble pressure method","level":3,"score":0.0},{"id":"https://openalex.org/C157915830","wikidata":"https://www.wikidata.org/wiki/Q2928001","display_name":"Bubble","level":2,"score":0.0},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/tkde.2002.1047777","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tkde.2002.1047777","pdf_url":null,"source":{"id":"https://openalex.org/S30698027","display_name":"IEEE Transactions on Knowledge and Data Engineering","issn_l":"1041-4347","issn":["1041-4347","1558-2191","2326-3865"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Knowledge and Data Engineering","raw_type":"journal-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.21.2535","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.21.2535","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://www.depaul.edu/~kliu/pub.d/tkde.ps","raw_type":"text"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.44.2507","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.44.2507","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://panda.cs.binghamton.edu/~meng/pub.d/tkde00.ps.gz","raw_type":"text"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":40,"referenced_works":["https://openalex.org/W21247300","https://openalex.org/W36509427","https://openalex.org/W52490583","https://openalex.org/W1502904630","https://openalex.org/W1523307303","https://openalex.org/W1564059483","https://openalex.org/W1610765524","https://openalex.org/W1721568912","https://openalex.org/W1952843833","https://openalex.org/W1956218947","https://openalex.org/W1956559956","https://openalex.org/W1964653195","https://openalex.org/W1974147361","https://openalex.org/W2000569744","https://openalex.org/W2001703546","https://openalex.org/W2012893546","https://openalex.org/W2023657004","https://openalex.org/W2025288694","https://openalex.org/W2032459394","https://openalex.org/W2073853190","https://openalex.org/W2086253379","https://openalex.org/W2137845970","https://openalex.org/W2147164982","https://openalex.org/W2152057064","https://openalex.org/W2169044456","https://openalex.org/W2325227998","https://openalex.org/W4210650935","https://openalex.org/W4213212078","https://openalex.org/W4230326608","https://openalex.org/W4234200495","https://openalex.org/W4241808895","https://openalex.org/W4247346926","https://openalex.org/W4255459561","https://openalex.org/W6600828572","https://openalex.org/W6601427904","https://openalex.org/W6602052844","https://openalex.org/W6633402523","https://openalex.org/W6636659669","https://openalex.org/W6684703521","https://openalex.org/W7045769202"],"related_works":["https://openalex.org/W2885550273","https://openalex.org/W2132107051","https://openalex.org/W4206776910","https://openalex.org/W2374021970","https://openalex.org/W2223855511","https://openalex.org/W4233958997","https://openalex.org/W2471246956","https://openalex.org/W2105258824","https://openalex.org/W2765856158","https://openalex.org/W2163291723"],"abstract_inverted_index":{"Searching":[0],"desired":[1],"data":[2,26],"on":[3,27],"the":[4,9,13,28,46,53,59,83,108,129,143,159,164,172,175],"Internet":[5,14],"is":[6,15,21,61,70,152,188],"one":[7],"of":[8,23,55,89,110,131,145,158,161,178],"most":[10],"common":[11],"ways":[12],"used.":[16],"No":[17],"single":[18],"search":[19,39,56,65,91,97,112,133,147,165],"engine":[20,113,134,148,166],"capable":[22],"searching":[24,94],"all":[25,64],"Internet.":[29],"The":[30,102],"approach":[31],"that":[32,167,184],"provides":[33],"an":[34],"interface":[35,60],"for":[36,41,67,135],"invoking":[37,63],"multiple":[38],"engines":[40,57,66,92,98],"each":[42,68,117],"user":[43],"query":[44,69,84,118,173],"has":[45],"potential":[47],"to":[48,85,116,127,154,171],"satisfy":[49],"more":[50,190],"users.":[51],"When":[52],"number":[54,88,160],"under":[58],"large,":[62],"often":[71],"not":[72],"cost":[73],"effective":[74],"because":[75],"it":[76],"creates":[77],"unnecessary":[78],"network":[79],"traffic":[80],"by":[81],"sending":[82],"a":[86,124,132,140,146,156],"large":[87],"useless":[90,96],"and":[93,174],"these":[95,179],"wastes":[99],"local":[100],"resources.":[101],"problem":[103],"can":[104,119],"be":[105,120,155],"overcome":[106],"if":[107],"usefulness":[109,130,144],"every":[111],"with":[114],"respect":[115],"predicted.":[121],"We":[122],"present":[123],"statistical":[125],"method":[126,187],"estimate":[128],"any":[136],"given":[137,141],"query.":[138],"For":[139],"query,":[142],"in":[149,163],"this":[150],"paper":[151],"defined":[153],"combination":[157],"documents":[162],"are":[168],"sufficiently":[169],"similar":[170],"average":[176],"similarity":[177],"documents.":[180],"Experimental":[181],"results":[182],"indicate":[183],"our":[185],"estimation":[186],"much":[189],"accurate":[191],"than":[192],"existing":[193],"methods.":[194]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2015,"cited_by_count":2},{"year":2013,"cited_by_count":3}],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
