{"id":"https://openalex.org/W2043850887","doi":"https://doi.org/10.1002/asi.4630350102","title":"Statistical recognition of content terms in general text","display_name":"Statistical recognition of content terms in general text","publication_year":1984,"publication_date":"1984-01-01","ids":{"openalex":"https://openalex.org/W2043850887","doi":"https://doi.org/10.1002/asi.4630350102","mag":"2043850887"},"language":"en","primary_location":{"id":"doi:10.1002/asi.4630350102","is_oa":false,"landing_page_url":"https://doi.org/10.1002/asi.4630350102","pdf_url":null,"source":{"id":"https://openalex.org/S4210220780","display_name":"Journal of the American Society for Information Science","issn_l":"0002-8231","issn":["0002-8231","1097-4571"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320595","host_organization_name":"Wiley","host_organization_lineage":["https://openalex.org/P4310320595"],"host_organization_lineage_names":["Wiley"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of the American Society for Information Science","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5110356062","display_name":"Mart\u00edn Dillon","orcid":null},"institutions":[{"id":"https://openalex.org/I114027177","display_name":"University of North Carolina at Chapel Hill","ror":"https://ror.org/0130frc33","country_code":"US","type":"education","lineage":["https://openalex.org/I114027177"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Martin Dillon","raw_affiliation_strings":["School of Library Science, University of North Carolina, Chapel Hill, NC 27514"],"affiliations":[{"raw_affiliation_string":"School of Library Science, University of North Carolina, Chapel Hill, NC 27514","institution_ids":["https://openalex.org/I114027177"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5031034434","display_name":"Peggy Federhart","orcid":null},"institutions":[{"id":"https://openalex.org/I1341412227","display_name":"IBM (United States)","ror":"https://ror.org/05hh8d621","country_code":"US","type":"company","lineage":["https://openalex.org/I1341412227"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Peggy Federhart","raw_affiliation_strings":["Library, IBM Corporation, Charlotte, NC 28257"],"affiliations":[{"raw_affiliation_string":"Library, IBM Corporation, Charlotte, NC 28257","institution_ids":["https://openalex.org/I1341412227"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5110356062"],"corresponding_institution_ids":["https://openalex.org/I114027177"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.26026428,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":"35","issue":"1","first_page":"1","last_page":"10"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10286","display_name":"Information Retrieval and Search Behavior","score":0.9966999888420105,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10286","display_name":"Information Retrieval and Search Behavior","score":0.9966999888420105,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9790999889373779,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13083","display_name":"Advanced Text Analysis Techniques","score":0.9768000245094299,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/generalizability-theory","display_name":"Generalizability theory","score":0.785011887550354},{"id":"https://openalex.org/keywords/vocabulary","display_name":"Vocabulary","score":0.6727780103683472},{"id":"https://openalex.org/keywords/variety","display_name":"Variety (cybernetics)","score":0.5964342951774597},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5945261716842651},{"id":"https://openalex.org/keywords/search-engine-indexing","display_name":"Search engine indexing","score":0.5589512586593628},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5357735753059387},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.504196047782898},{"id":"https://openalex.org/keywords/stability","display_name":"Stability (learning theory)","score":0.503572404384613},{"id":"https://openalex.org/keywords/linear-discriminant-analysis","display_name":"Linear discriminant analysis","score":0.4756942689418793},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4522748589515686},{"id":"https://openalex.org/keywords/recall","display_name":"Recall","score":0.4484878480434418},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.4418491721153259},{"id":"https://openalex.org/keywords/test","display_name":"Test (biology)","score":0.43579623103141785},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.42698460817337036},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.40204864740371704},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.34554964303970337},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.2947085499763489},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.2608690857887268},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.17278099060058594},{"id":"https://openalex.org/keywords/cognitive-psychology","display_name":"Cognitive psychology","score":0.13976183533668518},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.13973066210746765}],"concepts":[{"id":"https://openalex.org/C27158222","wikidata":"https://www.wikidata.org/wiki/Q5532422","display_name":"Generalizability theory","level":2,"score":0.785011887550354},{"id":"https://openalex.org/C2777601683","wikidata":"https://www.wikidata.org/wiki/Q6499736","display_name":"Vocabulary","level":2,"score":0.6727780103683472},{"id":"https://openalex.org/C136197465","wikidata":"https://www.wikidata.org/wiki/Q1729295","display_name":"Variety (cybernetics)","level":2,"score":0.5964342951774597},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5945261716842651},{"id":"https://openalex.org/C75165309","wikidata":"https://www.wikidata.org/wiki/Q2258979","display_name":"Search engine indexing","level":2,"score":0.5589512586593628},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5357735753059387},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.504196047782898},{"id":"https://openalex.org/C112972136","wikidata":"https://www.wikidata.org/wiki/Q7595718","display_name":"Stability (learning theory)","level":2,"score":0.503572404384613},{"id":"https://openalex.org/C69738355","wikidata":"https://www.wikidata.org/wiki/Q1228929","display_name":"Linear discriminant analysis","level":2,"score":0.4756942689418793},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4522748589515686},{"id":"https://openalex.org/C100660578","wikidata":"https://www.wikidata.org/wiki/Q18733","display_name":"Recall","level":2,"score":0.4484878480434418},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.4418491721153259},{"id":"https://openalex.org/C2777267654","wikidata":"https://www.wikidata.org/wiki/Q3519023","display_name":"Test (biology)","level":2,"score":0.43579623103141785},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.42698460817337036},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.40204864740371704},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.34554964303970337},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.2947085499763489},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2608690857887268},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.17278099060058594},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.13976183533668518},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.13973066210746765},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C78458016","wikidata":"https://www.wikidata.org/wiki/Q840400","display_name":"Evolutionary biology","level":1,"score":0.0},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1002/asi.4630350102","is_oa":false,"landing_page_url":"https://doi.org/10.1002/asi.4630350102","pdf_url":null,"source":{"id":"https://openalex.org/S4210220780","display_name":"Journal of the American Society for Information Science","issn_l":"0002-8231","issn":["0002-8231","1097-4571"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320595","host_organization_name":"Wiley","host_organization_lineage":["https://openalex.org/P4310320595"],"host_organization_lineage_names":["Wiley"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of the American Society for Information Science","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.75,"display_name":"Reduced inequalities","id":"https://metadata.un.org/sdg/10"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":10,"referenced_works":["https://openalex.org/W104683279","https://openalex.org/W1723574056","https://openalex.org/W1971155308","https://openalex.org/W1998041663","https://openalex.org/W2005095359","https://openalex.org/W2053713852","https://openalex.org/W2091061212","https://openalex.org/W2102539853","https://openalex.org/W2190672796","https://openalex.org/W2319554640"],"related_works":["https://openalex.org/W2118717649","https://openalex.org/W1958015814","https://openalex.org/W410723623","https://openalex.org/W2413243053","https://openalex.org/W2015341305","https://openalex.org/W4225593417","https://openalex.org/W2035068594","https://openalex.org/W2059783833","https://openalex.org/W2573498121","https://openalex.org/W1549477351"],"abstract_inverted_index":{"Abstract":[0],"This":[1],"article":[2],"discusses":[3],"ways":[4],"to":[5,91],"improve":[6],"the":[7,15,31],"quality":[8],"of":[9,17,35,46,56,76,82],"retrieval":[10],"systems":[11],"that":[12,63],"depend":[13],"on":[14],"use":[16],"truncated":[18],"words":[19],"or":[20],"quasi\u2010word":[21],"stems":[22],"as":[23],"an":[24],"indexing":[25],"vocabulary.":[26],"The":[27],"problems":[28],"addressed":[29],"are":[30],"generalizability":[32],"and":[33],"stability":[34],"discriminant":[36],"function":[37],"analysis":[38],"for":[39],"selecting":[40],"good":[41],"topical":[42,64,77],"terms":[43,45,65,78],"from":[44,54],"relatively":[47],"high":[48,74],"frequency":[49],"in":[50,94],"a":[51,80],"database":[52],"drawn":[53],"abstracts":[55],"Harris":[57],"Survey":[58],"press":[59],"releases.":[60],"Results":[61],"confirm":[62],"can":[66],"be":[67],"identified":[68],"by":[69],"their":[70],"statistical":[71],"properties.":[72],"Consistently":[73],"recall":[75],"under":[79],"variety":[81],"different":[83],"conditions":[84],"implies":[85],"persistent":[86],"underlying":[87],"properties":[88],"strong":[89],"enough":[90],"resist":[92],"changes":[93],"test":[95],"environment.":[96]},"counts_by_year":[{"year":2019,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
