{"id":"https://openalex.org/W2146350580","doi":"https://doi.org/10.1145/2499178.2499189","title":"Efficient Nearest-Neighbor Search in the Probability Simplex","display_name":"Efficient Nearest-Neighbor Search in the Probability Simplex","publication_year":2013,"publication_date":"2013-09-29","ids":{"openalex":"https://openalex.org/W2146350580","doi":"https://doi.org/10.1145/2499178.2499189","mag":"2146350580"},"language":"en","primary_location":{"id":"doi:10.1145/2499178.2499189","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2499178.2499189","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2013 Conference on the Theory of Information Retrieval","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5008387715","display_name":"Kriste Krstovski","orcid":null},"institutions":[{"id":"https://openalex.org/I24603500","display_name":"University of Massachusetts Amherst","ror":"https://ror.org/0072zz521","country_code":"US","type":"education","lineage":["https://openalex.org/I24603500"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Kriste Krstovski","raw_affiliation_strings":["School of Computer Science, University of Massachusetts, Amherst, MA, 01003, U.S.A","School of Computer Science, University of Massachusetts, Amherst, MA, 01003, U.S.A.#TAB#"],"affiliations":[{"raw_affiliation_string":"School of Computer Science, University of Massachusetts, Amherst, MA, 01003, U.S.A","institution_ids":[]},{"raw_affiliation_string":"School of Computer Science, University of Massachusetts, Amherst, MA, 01003, U.S.A.#TAB#","institution_ids":["https://openalex.org/I24603500"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101722493","display_name":"David A. Smith","orcid":"https://orcid.org/0000-0002-6636-6940"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"David A. Smith","raw_affiliation_strings":["College of Computer and Information Science, Northeastern University, Boston, MA, 02115, U.S.A"],"affiliations":[{"raw_affiliation_string":"College of Computer and Information Science, Northeastern University, Boston, MA, 02115, U.S.A","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5046348432","display_name":"Hanna Wallach","orcid":"https://orcid.org/0000-0003-3395-7186"},"institutions":[{"id":"https://openalex.org/I24603500","display_name":"University of Massachusetts Amherst","ror":"https://ror.org/0072zz521","country_code":"US","type":"education","lineage":["https://openalex.org/I24603500"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Hanna M. Wallach","raw_affiliation_strings":["School of Computer Science, University of Massachusetts, Amherst, MA, 01003, U.S.A","School of Computer Science, University of Massachusetts, Amherst, MA, 01003, U.S.A.#TAB#"],"affiliations":[{"raw_affiliation_string":"School of Computer Science, University of Massachusetts, Amherst, MA, 01003, U.S.A","institution_ids":[]},{"raw_affiliation_string":"School of Computer Science, University of Massachusetts, Amherst, MA, 01003, U.S.A.#TAB#","institution_ids":["https://openalex.org/I24603500"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101979548","display_name":"Andrew McGregor","orcid":"https://orcid.org/0000-0002-2124-160X"},"institutions":[{"id":"https://openalex.org/I24603500","display_name":"University of Massachusetts Amherst","ror":"https://ror.org/0072zz521","country_code":"US","type":"education","lineage":["https://openalex.org/I24603500"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Andrew McGregor","raw_affiliation_strings":["School of Computer Science, University of Massachusetts, Amherst, MA, 01003, U.S.A","School of Computer Science, University of Massachusetts, Amherst, MA, 01003, U.S.A.#TAB#"],"affiliations":[{"raw_affiliation_string":"School of Computer Science, University of Massachusetts, Amherst, MA, 01003, U.S.A","institution_ids":[]},{"raw_affiliation_string":"School of Computer Science, University of Massachusetts, Amherst, MA, 01003, U.S.A.#TAB#","institution_ids":["https://openalex.org/I24603500"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5008387715"],"corresponding_institution_ids":["https://openalex.org/I24603500"],"apc_list":null,"apc_paid":null,"fwci":1.6586,"has_fulltext":false,"cited_by_count":14,"citation_normalized_percentile":{"value":0.86916615,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"101","last_page":"108"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10824","display_name":"Image Retrieval and Classification Techniques","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11106","display_name":"Data Management and Algorithms","score":0.9973999857902527,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/locality-sensitive-hashing","display_name":"Locality-sensitive hashing","score":0.8334452509880066},{"id":"https://openalex.org/keywords/hellinger-distance","display_name":"Hellinger distance","score":0.7737371921539307},{"id":"https://openalex.org/keywords/nearest-neighbor-search","display_name":"Nearest neighbor search","score":0.7467784881591797},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7006406188011169},{"id":"https://openalex.org/keywords/latent-dirichlet-allocation","display_name":"Latent Dirichlet allocation","score":0.6875810623168945},{"id":"https://openalex.org/keywords/k-nearest-neighbors-algorithm","display_name":"k-nearest neighbors algorithm","score":0.4887380599975586},{"id":"https://openalex.org/keywords/simplex","display_name":"Simplex","score":0.4885898232460022},{"id":"https://openalex.org/keywords/euclidean-distance","display_name":"Euclidean distance","score":0.4585355222225189},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.4317276179790497},{"id":"https://openalex.org/keywords/visualization","display_name":"Visualization","score":0.42012137174606323},{"id":"https://openalex.org/keywords/topic-model","display_name":"Topic model","score":0.386782705783844},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3798954486846924},{"id":"https://openalex.org/keywords/hash-function","display_name":"Hash function","score":0.34703558683395386},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3415992259979248},{"id":"https://openalex.org/keywords/hash-table","display_name":"Hash table","score":0.21950381994247437},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.21488603949546814},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.10530087351799011}],"concepts":[{"id":"https://openalex.org/C74270461","wikidata":"https://www.wikidata.org/wiki/Q1625299","display_name":"Locality-sensitive hashing","level":4,"score":0.8334452509880066},{"id":"https://openalex.org/C153024298","wikidata":"https://www.wikidata.org/wiki/Q3030678","display_name":"Hellinger distance","level":2,"score":0.7737371921539307},{"id":"https://openalex.org/C116738811","wikidata":"https://www.wikidata.org/wiki/Q608751","display_name":"Nearest neighbor search","level":2,"score":0.7467784881591797},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7006406188011169},{"id":"https://openalex.org/C500882744","wikidata":"https://www.wikidata.org/wiki/Q269236","display_name":"Latent Dirichlet allocation","level":3,"score":0.6875810623168945},{"id":"https://openalex.org/C113238511","wikidata":"https://www.wikidata.org/wiki/Q1071612","display_name":"k-nearest neighbors algorithm","level":2,"score":0.4887380599975586},{"id":"https://openalex.org/C62438384","wikidata":"https://www.wikidata.org/wiki/Q331350","display_name":"Simplex","level":2,"score":0.4885898232460022},{"id":"https://openalex.org/C120174047","wikidata":"https://www.wikidata.org/wiki/Q847073","display_name":"Euclidean distance","level":2,"score":0.4585355222225189},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.4317276179790497},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.42012137174606323},{"id":"https://openalex.org/C171686336","wikidata":"https://www.wikidata.org/wiki/Q3532085","display_name":"Topic model","level":2,"score":0.386782705783844},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3798954486846924},{"id":"https://openalex.org/C99138194","wikidata":"https://www.wikidata.org/wiki/Q183427","display_name":"Hash function","level":2,"score":0.34703558683395386},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3415992259979248},{"id":"https://openalex.org/C67388219","wikidata":"https://www.wikidata.org/wiki/Q207440","display_name":"Hash table","level":3,"score":0.21950381994247437},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.21488603949546814},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.10530087351799011},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1145/2499178.2499189","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2499178.2499189","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2013 Conference on the Theory of Information Retrieval","raw_type":"proceedings-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.366.8795","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.366.8795","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://maroo.cs.umass.edu/pub/web/getpdf.php?id=1101","raw_type":"text"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.387.7987","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.387.7987","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://www.ccs.neu.edu/home/dasmith/krstovski-ictir-2013.pdf","raw_type":"text"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.7200000286102295,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":43,"referenced_works":["https://openalex.org/W193734270","https://openalex.org/W1491105865","https://openalex.org/W1508001288","https://openalex.org/W1552863221","https://openalex.org/W1877442813","https://openalex.org/W1880262756","https://openalex.org/W2012833704","https://openalex.org/W2024668293","https://openalex.org/W2024722304","https://openalex.org/W2033593667","https://openalex.org/W2042980227","https://openalex.org/W2068098316","https://openalex.org/W2071866949","https://openalex.org/W2075245318","https://openalex.org/W2078396654","https://openalex.org/W2078764670","https://openalex.org/W2099111195","https://openalex.org/W2103587173","https://openalex.org/W2107743791","https://openalex.org/W2115924763","https://openalex.org/W2122056984","https://openalex.org/W2129891468","https://openalex.org/W2140427797","https://openalex.org/W2146950091","https://openalex.org/W2146985866","https://openalex.org/W2147152072","https://openalex.org/W2147717514","https://openalex.org/W2152925220","https://openalex.org/W2155969596","https://openalex.org/W2159902206","https://openalex.org/W2163013692","https://openalex.org/W2165558283","https://openalex.org/W2165599843","https://openalex.org/W2166354010","https://openalex.org/W2251764313","https://openalex.org/W2278264611","https://openalex.org/W2397770138","https://openalex.org/W2616657226","https://openalex.org/W3007702476","https://openalex.org/W3099640513","https://openalex.org/W4231657491","https://openalex.org/W4233135949","https://openalex.org/W6785094883"],"related_works":["https://openalex.org/W2769501189","https://openalex.org/W4315588616","https://openalex.org/W4312773271","https://openalex.org/W2888805565","https://openalex.org/W2962686197","https://openalex.org/W3005513013","https://openalex.org/W2207653751","https://openalex.org/W2611137333","https://openalex.org/W4389543811","https://openalex.org/W2146350580"],"abstract_inverted_index":{"Document":[0],"similarity":[1],"tasks":[2,119,144],"arise":[3],"in":[4,99,104],"many":[5],"areas":[6],"of":[7,54,70,73,114,132,173],"information":[8],"retrieval":[9,138],"and":[10,37,68,96,112,136,145,154,187],"natural":[11],"language":[12],"processing.":[13],"A":[14],"fundamental":[15],"question":[16],"when":[17,161],"comparing":[18,48],"documents":[19,40,175],"is":[20,176,184],"which":[21,27],"representation":[22],"to":[23,76,84,177],"use.":[24],"Topic":[25],"models,":[26],"have":[28],"served":[29],"as":[30,41,58,92],"versatile":[31],"tools":[32],"for":[33,102,139],"exploratory":[34],"data":[35,148],"analysis":[36,67],"visualization,":[38],"represent":[39],"probability":[42,55,106],"distributions":[43,50],"over":[44],"latent":[45,121],"topics.":[46],"Systems":[47],"topic":[49],"thus":[51],"use":[52],"measures":[53],"divergence":[56,75],"such":[57,91],"Kullback-Leibler,":[59],"Jensen-Shannon,":[60],"or":[61],"Hellinger.":[62],"This":[63,80],"paper":[64],"presents":[65],"novel":[66],"applications":[69],"the":[71,105,110,180],"reduction":[72,81],"Hellinger":[74],"Euclidean":[77,152],"distance":[78],"computations.":[79],"allows":[82],"us":[83],"exploit":[85],"fast":[86],"approximate":[87,97,155],"nearest-neighbor":[88],"(NN)":[89],"techniques,":[90],"locality-sensitive":[93],"hashing":[94],"(LSH)":[95],"search":[98,103,158],"k-d":[100,156,181],"trees,":[101],"simplex.":[107],"We":[108],"demonstrate":[109],"effectiveness":[111],"efficiency":[113],"this":[115],"approach":[116,183],"on":[117,142,146],"two":[118],"using":[120],"Dirichlet":[122],"allocation":[123],"(LDA)":[124],"document":[125],"representations:":[126],"discovering":[127],"relationships":[128],"between":[129],"National":[130],"Institutes":[131],"Health":[133],"(NIH)":[134],"grants":[135],"prior-art":[137],"patents.":[140],"Evaluation":[141],"these":[143],"synthetic":[147],"shows":[149],"that":[150],"both":[151],"LSH":[153],"tree":[157,182],"perform":[159],"well":[160],"a":[162,170],"single":[163],"nearest":[164],"neighbor":[165],"must":[166],"be":[167,178],"found.":[168],"When":[169],"larger":[171],"set":[172],"similar":[174],"retrieved,":[179],"more":[185],"effective":[186],"efficient.":[188]},"counts_by_year":[{"year":2024,"cited_by_count":2},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":2},{"year":2018,"cited_by_count":1},{"year":2017,"cited_by_count":2},{"year":2016,"cited_by_count":2},{"year":2015,"cited_by_count":2},{"year":2014,"cited_by_count":2}],"updated_date":"2026-04-05T17:49:38.594831","created_date":"2025-10-10T00:00:00"}
