{"id":"https://openalex.org/W2080068076","doi":"https://doi.org/10.1002/asi.4630260402","title":"A probabilistic approach to automatic keyword indexing. Part I. On the Distribution of Specialty Words in a Technical Literature","display_name":"A probabilistic approach to automatic keyword indexing. Part I. On the Distribution of Specialty Words in a Technical Literature","publication_year":1975,"publication_date":"1975-07-01","ids":{"openalex":"https://openalex.org/W2080068076","doi":"https://doi.org/10.1002/asi.4630260402","mag":"2080068076"},"language":"en","primary_location":{"id":"doi:10.1002/asi.4630260402","is_oa":false,"landing_page_url":"https://doi.org/10.1002/asi.4630260402","pdf_url":null,"source":{"id":"https://openalex.org/S4210220780","display_name":"Journal of the American Society for Information Science","issn_l":"0002-8231","issn":["0002-8231","1097-4571"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320595","host_organization_name":"Wiley","host_organization_lineage":["https://openalex.org/P4310320595"],"host_organization_lineage_names":["Wiley"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of the American Society for Information Science","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5056752791","display_name":"Stephen P. Harter","orcid":null},"institutions":[{"id":"https://openalex.org/I2613432","display_name":"University of South Florida","ror":"https://ror.org/032db5x82","country_code":"US","type":"education","lineage":["https://openalex.org/I2613432"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Stephen P. Harter","raw_affiliation_strings":["Library Science/Audiovisual Program (FAO 186) University of South Florida Tampa, Florida 33620"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Library Science/Audiovisual Program (FAO 186) University of South Florida Tampa, Florida 33620","institution_ids":["https://openalex.org/I2613432"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5056752791"],"corresponding_institution_ids":["https://openalex.org/I2613432"],"apc_list":null,"apc_paid":null,"fwci":5.451,"has_fulltext":false,"cited_by_count":198,"citation_normalized_percentile":{"value":0.95311279,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":"26","issue":"4","first_page":"197","last_page":"206"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13083","display_name":"Advanced Text Analysis Techniques","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T13083","display_name":"Advanced Text Analysis Techniques","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10215","display_name":"Semantic Web and Ontologies","score":0.9751999974250793,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.9728000164031982,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/poisson-distribution","display_name":"Poisson distribution","score":0.7585189342498779},{"id":"https://openalex.org/keywords/search-engine-indexing","display_name":"Search engine indexing","score":0.6839837431907654},{"id":"https://openalex.org/keywords/specialty","display_name":"Specialty","score":0.645257830619812},{"id":"https://openalex.org/keywords/probabilistic-logic","display_name":"Probabilistic logic","score":0.6190487742424011},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6095754504203796},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5575204491615295},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.48242267966270447},{"id":"https://openalex.org/keywords/statistical-model","display_name":"Statistical model","score":0.4532306492328644},{"id":"https://openalex.org/keywords/index","display_name":"Index (typography)","score":0.45022648572921753},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.4393329322338104},{"id":"https://openalex.org/keywords/contrast","display_name":"Contrast (vision)","score":0.4273712933063507},{"id":"https://openalex.org/keywords/probability-distribution","display_name":"Probability distribution","score":0.42242273688316345},{"id":"https://openalex.org/keywords/distribution","display_name":"Distribution (mathematics)","score":0.4183717668056488},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.39300844073295593},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.3888623118400574},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3327330946922302},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.3065846264362335},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.14300256967544556},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.1160484254360199}],"concepts":[{"id":"https://openalex.org/C100906024","wikidata":"https://www.wikidata.org/wiki/Q205692","display_name":"Poisson distribution","level":2,"score":0.7585189342498779},{"id":"https://openalex.org/C75165309","wikidata":"https://www.wikidata.org/wiki/Q2258979","display_name":"Search engine indexing","level":2,"score":0.6839837431907654},{"id":"https://openalex.org/C20387591","wikidata":"https://www.wikidata.org/wiki/Q930752","display_name":"Specialty","level":2,"score":0.645257830619812},{"id":"https://openalex.org/C49937458","wikidata":"https://www.wikidata.org/wiki/Q2599292","display_name":"Probabilistic logic","level":2,"score":0.6190487742424011},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6095754504203796},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5575204491615295},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.48242267966270447},{"id":"https://openalex.org/C114289077","wikidata":"https://www.wikidata.org/wiki/Q3284399","display_name":"Statistical model","level":2,"score":0.4532306492328644},{"id":"https://openalex.org/C2777382242","wikidata":"https://www.wikidata.org/wiki/Q6017816","display_name":"Index (typography)","level":2,"score":0.45022648572921753},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.4393329322338104},{"id":"https://openalex.org/C2776502983","wikidata":"https://www.wikidata.org/wiki/Q690182","display_name":"Contrast (vision)","level":2,"score":0.4273712933063507},{"id":"https://openalex.org/C149441793","wikidata":"https://www.wikidata.org/wiki/Q200726","display_name":"Probability distribution","level":2,"score":0.42242273688316345},{"id":"https://openalex.org/C110121322","wikidata":"https://www.wikidata.org/wiki/Q865811","display_name":"Distribution (mathematics)","level":2,"score":0.4183717668056488},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.39300844073295593},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.3888623118400574},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3327330946922302},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.3065846264362335},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.14300256967544556},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.1160484254360199},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C118552586","wikidata":"https://www.wikidata.org/wiki/Q7867","display_name":"Psychiatry","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1002/asi.4630260402","is_oa":false,"landing_page_url":"https://doi.org/10.1002/asi.4630260402","pdf_url":null,"source":{"id":"https://openalex.org/S4210220780","display_name":"Journal of the American Society for Information Science","issn_l":"0002-8231","issn":["0002-8231","1097-4571"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320595","host_organization_name":"Wiley","host_organization_lineage":["https://openalex.org/P4310320595"],"host_organization_lineage_names":["Wiley"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of the American Society for Information Science","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":12,"referenced_works":["https://openalex.org/W639512594","https://openalex.org/W1499624045","https://openalex.org/W1511258569","https://openalex.org/W1528917427","https://openalex.org/W1982442952","https://openalex.org/W1986305179","https://openalex.org/W2009190245","https://openalex.org/W2034701578","https://openalex.org/W2078857984","https://openalex.org/W2801840425","https://openalex.org/W3015934416","https://openalex.org/W4300038597"],"related_works":["https://openalex.org/W2000811477","https://openalex.org/W2365653098","https://openalex.org/W3045293259","https://openalex.org/W4229884329","https://openalex.org/W3032220911","https://openalex.org/W2349001406","https://openalex.org/W2364829248","https://openalex.org/W2385612156","https://openalex.org/W2382841014","https://openalex.org/W2386244707"],"abstract_inverted_index":{"Abstract":[0],"The":[1,37,120],"problem":[2],"studied":[3],"in":[4,67,94,112],"this":[5],"research":[6,38],"is":[7,92,125,155],"that":[8,35,50],"of":[9,13,20,24,48,70,83,88,99,109,114,122,140],"developing":[10],"a":[11,25,46,68,86,97],"set":[12],"formal":[14],"statistical":[15],"rules":[16],"for":[17,34,58,137],"the":[18,22,42,84,106,110,123,138,141,152],"purpose":[19],"identifying":[21],"keywords":[23],"document\u2010words":[26],"likely":[27],"to":[28,62,134,146],"be":[29,63,135],"useful":[30],"as":[31,96],"index":[32],"terms":[33,113],"document.":[36],"was":[39],"prompted":[40],"by":[41,45],"observation,":[43],"made":[44],"number":[47],"writers,":[49],"non\u2010specialty":[51],"words,":[52,149],"words":[53,75],"which":[54],"possess":[55],"little":[56],"value":[57],"indexing":[59],"purposes,":[60],"tend":[61],"distributed":[64],"at":[65],"random":[66],"collection":[69,131],"documents.":[71],"In":[72,80],"contrast,":[73],"specialty":[74,100,148],"are":[76,118],"not":[77],"so":[78],"distributed.":[79],"Part":[81],"I":[82],"study,":[85],"mixture":[87],"two":[89],"Poisson":[90],"distributions":[91],"examined":[93],"detail":[95],"model":[98,111,124],"word":[101],"distribution,":[102],"and":[103,132,157],"formulas":[104],"expressing":[105],"three":[107],"parameters":[108],"empirical":[115],"frequency":[116],"statistics":[117],"derived.":[119],"fit":[121],"tested":[126],"on":[127],"an":[128],"experimental":[129],"document":[130],"found":[133],"acceptable":[136],"purposes":[139],"study.":[142],"A":[143],"measure":[144],"intended":[145],"identify":[147],"consistent":[150],"with":[151],"2\u2010Poisson":[153],"model,":[154],"proposed":[156],"evaluated.":[158]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":3},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":3},{"year":2020,"cited_by_count":2},{"year":2019,"cited_by_count":1},{"year":2018,"cited_by_count":5},{"year":2017,"cited_by_count":5},{"year":2016,"cited_by_count":6},{"year":2015,"cited_by_count":7},{"year":2014,"cited_by_count":5},{"year":2013,"cited_by_count":8},{"year":2012,"cited_by_count":6}],"updated_date":"2026-05-21T06:26:12.895304","created_date":"2025-10-10T00:00:00"}
