{"id":"https://openalex.org/W2067824637","doi":"https://doi.org/10.1080/09296170802514153","title":"Internet Search Result Probabilities: Heaps' Law and Word Associativity*","display_name":"Internet Search Result Probabilities: Heaps' Law and Word Associativity*","publication_year":2009,"publication_date":"2009-02-01","ids":{"openalex":"https://openalex.org/W2067824637","doi":"https://doi.org/10.1080/09296170802514153","mag":"2067824637"},"language":"en","primary_location":{"id":"doi:10.1080/09296170802514153","is_oa":false,"landing_page_url":"https://doi.org/10.1080/09296170802514153","pdf_url":null,"source":{"id":"https://openalex.org/S24321443","display_name":"Journal of Quantitative Linguistics","issn_l":"0929-6174","issn":["0929-6174","1744-5035"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319847","host_organization_name":"Routledge","host_organization_lineage":["https://openalex.org/P4310319847"],"host_organization_lineage_names":["Routledge"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Quantitative Linguistics","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5024676984","display_name":"Jonathan C. Lansey","orcid":null},"institutions":[{"id":"https://openalex.org/I111088046","display_name":"Boston University","ror":"https://ror.org/05qwgg493","country_code":"US","type":"education","lineage":["https://openalex.org/I111088046"]},{"id":"https://openalex.org/I118118575","display_name":"New Jersey Institute of Technology","ror":"https://ror.org/05e74xb87","country_code":"US","type":"education","lineage":["https://openalex.org/I118118575"]},{"id":"https://openalex.org/I4210159958","display_name":"Cognitive Research (United States)","ror":"https://ror.org/04s361q55","country_code":"US","type":"company","lineage":["https://openalex.org/I4210159958"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Jonathan C. Lansey","raw_affiliation_strings":["Department of Mathematical Sciences , New Jersey Institute of Technology ,  USA ;     Cognitive and Neural Systems Department , Boston University ,  USA","Department of Mathematical Sciences , New Jersey Institute of Technology ,  USA","Cognitive and Neural Systems Department , Boston University ,  USA"],"affiliations":[{"raw_affiliation_string":"Department of Mathematical Sciences , New Jersey Institute of Technology ,  USA ;     Cognitive and Neural Systems Department , Boston University ,  USA","institution_ids":["https://openalex.org/I4210159958"]},{"raw_affiliation_string":"Department of Mathematical Sciences , New Jersey Institute of Technology ,  USA","institution_ids":["https://openalex.org/I118118575"]},{"raw_affiliation_string":"Cognitive and Neural Systems Department , Boston University ,  USA","institution_ids":["https://openalex.org/I111088046"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5041949430","display_name":"Bruce Bukiet","orcid":null},"institutions":[{"id":"https://openalex.org/I118118575","display_name":"New Jersey Institute of Technology","ror":"https://ror.org/05e74xb87","country_code":"US","type":"education","lineage":["https://openalex.org/I118118575"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Bruce Bukiet","raw_affiliation_strings":["Department of Mathematical Sciences , New Jersey Institute of Technology ,  USA"],"affiliations":[{"raw_affiliation_string":"Department of Mathematical Sciences , New Jersey Institute of Technology ,  USA","institution_ids":["https://openalex.org/I118118575"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5024676984"],"corresponding_institution_ids":["https://openalex.org/I111088046","https://openalex.org/I118118575","https://openalex.org/I4210159958"],"apc_list":null,"apc_paid":null,"fwci":0.9616,"has_fulltext":false,"cited_by_count":10,"citation_normalized_percentile":{"value":0.76360273,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":"16","issue":"1","first_page":"40","last_page":"66"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10064","display_name":"Complex Network Analysis Techniques","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/3109","display_name":"Statistical and Nonlinear Physics"},"field":{"id":"https://openalex.org/fields/31","display_name":"Physics and Astronomy"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10064","display_name":"Complex Network Analysis Techniques","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/3109","display_name":"Statistical and Nonlinear Physics"},"field":{"id":"https://openalex.org/fields/31","display_name":"Physics and Astronomy"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11644","display_name":"Spam and Phishing Detection","score":0.9891999959945679,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/zipfs-law","display_name":"Zipf's law","score":0.9588347673416138},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.7290886640548706},{"id":"https://openalex.org/keywords/the-internet","display_name":"The Internet","score":0.6598284840583801},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6558710932731628},{"id":"https://openalex.org/keywords/word-lists-by-frequency","display_name":"Word lists by frequency","score":0.5211113095283508},{"id":"https://openalex.org/keywords/index","display_name":"Index (typography)","score":0.5021119117736816},{"id":"https://openalex.org/keywords/value","display_name":"Value (mathematics)","score":0.471381276845932},{"id":"https://openalex.org/keywords/associative-property","display_name":"Associative property","score":0.4403931200504303},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.4321558475494385},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.29061365127563477},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.28997671604156494},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.24483877420425415},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.21731597185134888}],"concepts":[{"id":"https://openalex.org/C125932096","wikidata":"https://www.wikidata.org/wiki/Q205472","display_name":"Zipf's law","level":2,"score":0.9588347673416138},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.7290886640548706},{"id":"https://openalex.org/C110875604","wikidata":"https://www.wikidata.org/wiki/Q75","display_name":"The Internet","level":2,"score":0.6598284840583801},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6558710932731628},{"id":"https://openalex.org/C175293574","wikidata":"https://www.wikidata.org/wiki/Q697133","display_name":"Word lists by frequency","level":3,"score":0.5211113095283508},{"id":"https://openalex.org/C2777382242","wikidata":"https://www.wikidata.org/wiki/Q6017816","display_name":"Index (typography)","level":2,"score":0.5021119117736816},{"id":"https://openalex.org/C2776291640","wikidata":"https://www.wikidata.org/wiki/Q2912517","display_name":"Value (mathematics)","level":2,"score":0.471381276845932},{"id":"https://openalex.org/C159423971","wikidata":"https://www.wikidata.org/wiki/Q177251","display_name":"Associative property","level":2,"score":0.4403931200504303},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.4321558475494385},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.29061365127563477},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.28997671604156494},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.24483877420425415},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.21731597185134888},{"id":"https://openalex.org/C202444582","wikidata":"https://www.wikidata.org/wiki/Q837863","display_name":"Pure mathematics","level":1,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C2777530160","wikidata":"https://www.wikidata.org/wiki/Q41796","display_name":"Sentence","level":2,"score":0.0},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1080/09296170802514153","is_oa":false,"landing_page_url":"https://doi.org/10.1080/09296170802514153","pdf_url":null,"source":{"id":"https://openalex.org/S24321443","display_name":"Journal of Quantitative Linguistics","issn_l":"0929-6174","issn":["0929-6174","1744-5035"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319847","host_organization_name":"Routledge","host_organization_lineage":["https://openalex.org/P4310319847"],"host_organization_lineage_names":["Routledge"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Quantitative Linguistics","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","score":0.7799999713897705,"display_name":"Peace, Justice and strong institutions"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":13,"referenced_works":["https://openalex.org/W611900225","https://openalex.org/W1517590677","https://openalex.org/W1605217017","https://openalex.org/W1976969221","https://openalex.org/W2011688587","https://openalex.org/W2037959956","https://openalex.org/W2066636486","https://openalex.org/W2074389198","https://openalex.org/W2077054557","https://openalex.org/W2146223937","https://openalex.org/W2997134027","https://openalex.org/W4252627275","https://openalex.org/W4301441647"],"related_works":["https://openalex.org/W2075174955","https://openalex.org/W1928085876","https://openalex.org/W1588516692","https://openalex.org/W2127709934","https://openalex.org/W2048176942","https://openalex.org/W2964245942","https://openalex.org/W4386816323","https://openalex.org/W39918333","https://openalex.org/W1507719567","https://openalex.org/W1724948991"],"abstract_inverted_index":{"Abstract":[0],"We":[1,27,122,149,167],"study":[2],"the":[3,15,40,51,56,62,98,103,127,135,151,174],"number":[4,16,42],"of":[5,17,43,137,147,153,177],"internet":[6,63],"search":[7,33,179],"results":[8,18],"returned":[9,19],"from":[10,74],"multi-word":[11,37],"queries":[12,38],"based":[13],"on":[14,61,72],"when":[20,83],"each":[21,78,141],"word":[22,69,76,138,157],"is":[23],"searched":[24,84],"for":[25,36,67,85,102],"individually.":[26],"derive":[28],"a":[29,88,142],"model":[30,128,171],"to":[31,55,111,132,172],"describe":[32],"result":[34],"values":[35],"using":[39],"total":[41],"pages":[44],"indexed":[45,104],"by":[46,49],"Google":[47],"and":[48,64,87,159,182],"applying":[50],"Zipf":[52,89],"power":[53],"law":[54,66,90,100],"words":[57],"per":[58],"page":[59],"distribution":[60],"Heaps'":[65,99],"unique":[68],"counts.":[70],"Based":[71],"data":[73],"351":[75],"pairs":[77,139],"with":[79,156],"exactly":[80],"one":[81],"hit":[82],"together,":[86],"coefficient":[91,101],"determined":[92],"in":[93],"other":[94],"studies,":[95],"we":[96,144],"approximate":[97],"worldwide":[105],"web":[106],"(about":[107],"8":[108,164],"billion":[109],"pages)":[110],"be":[112,130],"\u03b2":[113],"=":[114],"0.52.":[115],"Previous":[116],"studies":[117],"used":[118,131],"under":[119],"20,000":[120],"pages.":[121],"demonstrate":[123,150],"through":[124,160],"examples":[125],"how":[126],"can":[129],"analyse":[133],"automatically":[134],"relatedness":[136],"assigning":[140],"value":[143],"call":[145],"\u201cstrength":[146],"associativity\u201d.":[148],"validity":[152],"our":[154,170],"method":[155],"triplets":[158],"two":[161],"experiments":[162],"conducted":[163],"months":[165],"apart.":[166],"then":[168],"use":[169],"compare":[173],"index":[175],"sizes":[176],"competing":[178],"giants":[180],"Yahoo":[181],"Google.":[183]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2019,"cited_by_count":1},{"year":2018,"cited_by_count":1},{"year":2016,"cited_by_count":2},{"year":2013,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
