{"id":"https://openalex.org/W1982017033","doi":"https://doi.org/10.1145/2063576.2063630","title":"Implementation techniques for large-scale latent semantic indexing applications","display_name":"Implementation techniques for large-scale latent semantic indexing applications","publication_year":2011,"publication_date":"2011-10-24","ids":{"openalex":"https://openalex.org/W1982017033","doi":"https://doi.org/10.1145/2063576.2063630","mag":"1982017033"},"language":"en","primary_location":{"id":"doi:10.1145/2063576.2063630","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2063576.2063630","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 20th ACM international conference on Information and knowledge management","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5088893823","display_name":"Roger Bradford","orcid":"https://orcid.org/0000-0003-1750-3125"},"institutions":[{"id":"https://openalex.org/I138285227","display_name":"Agilent Technologies (United States)","ror":"https://ror.org/02tryst02","country_code":"US","type":"company","lineage":["https://openalex.org/I138285227"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Roger B. Bradford","raw_affiliation_strings":["Agilex Technologies Inc., Chantilly, VA, USA"],"affiliations":[{"raw_affiliation_string":"Agilex Technologies Inc., Chantilly, VA, USA","institution_ids":["https://openalex.org/I138285227"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5088893823"],"corresponding_institution_ids":["https://openalex.org/I138285227"],"apc_list":null,"apc_paid":null,"fwci":0.6125,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.64262426,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"339","last_page":"344"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11106","display_name":"Data Management and Algorithms","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11106","display_name":"Data Management and Algorithms","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12292","display_name":"Graph Theory and Algorithms","score":0.9962000250816345,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8086233139038086},{"id":"https://openalex.org/keywords/singular-value-decomposition","display_name":"Singular value decomposition","score":0.7950892448425293},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.7667934894561768},{"id":"https://openalex.org/keywords/search-engine-indexing","display_name":"Search engine indexing","score":0.669819712638855},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.5257681012153625},{"id":"https://openalex.org/keywords/cloud-computing","display_name":"Cloud computing","score":0.5168031454086304},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.5061697363853455},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.5000309944152832},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.49213093519210815},{"id":"https://openalex.org/keywords/scale","display_name":"Scale (ratio)","score":0.4422680735588074},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.36176827549934387},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.2727808356285095},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.17829740047454834},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.14645269513130188},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.09999260306358337}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8086233139038086},{"id":"https://openalex.org/C22789450","wikidata":"https://www.wikidata.org/wiki/Q420904","display_name":"Singular value decomposition","level":2,"score":0.7950892448425293},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.7667934894561768},{"id":"https://openalex.org/C75165309","wikidata":"https://www.wikidata.org/wiki/Q2258979","display_name":"Search engine indexing","level":2,"score":0.669819712638855},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.5257681012153625},{"id":"https://openalex.org/C79974875","wikidata":"https://www.wikidata.org/wiki/Q483639","display_name":"Cloud computing","level":2,"score":0.5168031454086304},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.5061697363853455},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.5000309944152832},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.49213093519210815},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.4422680735588074},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.36176827549934387},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.2727808356285095},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.17829740047454834},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.14645269513130188},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.09999260306358337},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/2063576.2063630","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2063576.2063630","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 20th ACM international conference on Information and knowledge management","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.44999998807907104,"id":"https://metadata.un.org/sdg/9","display_name":"Industry, innovation and infrastructure"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":12,"referenced_works":["https://openalex.org/W82845726","https://openalex.org/W1589050831","https://openalex.org/W1899156745","https://openalex.org/W2045517286","https://openalex.org/W2057870902","https://openalex.org/W2061459389","https://openalex.org/W2118868445","https://openalex.org/W2147152072","https://openalex.org/W2152311353","https://openalex.org/W2397021650","https://openalex.org/W2556575288","https://openalex.org/W2734583084"],"related_works":["https://openalex.org/W4244478748","https://openalex.org/W3024364549","https://openalex.org/W4206019083","https://openalex.org/W2048865712","https://openalex.org/W4223488648","https://openalex.org/W1976265003","https://openalex.org/W2370378377","https://openalex.org/W2134969820","https://openalex.org/W2251605416","https://openalex.org/W2560439919"],"abstract_inverted_index":{"The":[0],"technique":[1],"of":[2,22,29,47,51,122,138,149,155,189,199,215,237],"latent":[3],"semantic":[4],"indexing":[5],"(LSI)":[6],"has":[7,32,80],"wide":[8],"applicability":[9],"in":[10,62,141,182,186,201,226,246],"information":[11],"retrieval":[12],"and":[13,38,41,65],"data":[14,111],"mining":[15],"tasks.":[16],"To":[17],"date,":[18],"however,":[19],"most":[20,142],"applications":[21],"LSI":[23,72,78,108,114,172,183,200,221,243],"have":[24,68],"addressed":[25],"relatively":[26],"small":[27],"collections":[28,121],"data.":[30],"This":[31,98,231],"been":[33],"due":[34],"partly":[35,42],"to":[36,43,96,103,124,133,135,169,178,211],"hardware":[37,63],"software":[39,66],"limitations":[40],"overly":[44],"pessimistic":[45],"estimates":[46],"the":[48,52,83,87,113,136,144,150,156,161,165,175,180,187,234,238],"processing":[49,207],"requirements":[50],"singular":[53,151],"value":[54,152],"decomposition":[55,153],"(SVD)":[56,154],"process.":[57],"In":[58],"recent":[59],"years,":[60],"advances":[61],"capabilities":[64],"implementations":[67],"enabled":[69],"much":[70],"larger":[71],"applications.":[73],"Moreover,":[74],"experience":[75],"with":[76],"large":[77,213],"indexes":[79],"shown":[81,132,219],"that":[82,91,220],"SVD":[84,181],"is":[85,118,159,184,218,222],"not":[86,160],"limitation":[88],"on":[89],"scalability":[90,137],"it":[92],"was":[93],"long":[94],"thought":[95],"be.":[97],"paper":[99,232],"describes":[100],"techniques":[101],"applicable":[102],"creating":[104],"large-scale":[105,242],"(multi-million":[106],"document)":[107],"indexes.":[109],"Detailed":[110],"regarding":[112],"index":[115,194],"creation":[116,195],"process":[117],"presented":[119],"for":[120,147,224,241],"up":[123],"100":[125],"million":[126],"documents.":[127],"Four":[128],"key":[129],"factors":[130],"are":[131],"contribute":[134],"LSI.":[139],"First,":[140],"situations,":[143],"time":[145,167,176,240],"required":[146,168,177],"calculation":[148],"term-document":[157],"matrix":[158],"dominant":[162],"factor":[163],"determining":[164],"overall":[166],"build":[170,244],"an":[171],"index.":[173],"Second,":[174],"calculate":[179],"linear":[185],"number":[188],"objects":[190],"indexed.":[191],"Third,":[192],"incremental":[193],"greatly":[196],"facilitates":[197],"use":[198],"dynamic":[202],"environments.":[203,230],"Fourth,":[204],"distributed":[205,228],"query":[206],"can":[208],"be":[209],"employed":[210],"support":[212],"numbers":[214],"users.":[216],"It":[217],"well-suited":[223],"implementation":[225],"modern":[227],"computing":[229],"provides":[233],"first":[235],"measurements":[236],"execution":[239],"processes":[245],"a":[247],"cloud":[248],"environment.":[249]},"counts_by_year":[{"year":2021,"cited_by_count":1},{"year":2016,"cited_by_count":1},{"year":2015,"cited_by_count":2},{"year":2013,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
