{"id":"https://openalex.org/W2585200994","doi":"https://doi.org/10.1145/3018661.3018727","title":"Partitioning and Segment Organization Strategies for Real-Time Selective Search on Document Streams","display_name":"Partitioning and Segment Organization Strategies for Real-Time Selective Search on Document Streams","publication_year":2017,"publication_date":"2017-02-02","ids":{"openalex":"https://openalex.org/W2585200994","doi":"https://doi.org/10.1145/3018661.3018727","mag":"2585200994"},"language":"en","primary_location":{"id":"doi:10.1145/3018661.3018727","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3018661.3018727","pdf_url":"http://dl.acm.org/ft_gateway.cfm?id=3018727&type=pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Tenth ACM International Conference on Web Search and Data Mining","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"http://dl.acm.org/ft_gateway.cfm?id=3018727&type=pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100682371","display_name":"Yulu Wang","orcid":"https://orcid.org/0009-0004-5100-7816"},"institutions":[{"id":"https://openalex.org/I66946132","display_name":"University of Maryland, College Park","ror":"https://ror.org/047s2c258","country_code":"US","type":"education","lineage":["https://openalex.org/I66946132"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Yulu Wang","raw_affiliation_strings":["University of Maryland, College Park, MD, USA"],"affiliations":[{"raw_affiliation_string":"University of Maryland, College Park, MD, USA","institution_ids":["https://openalex.org/I66946132"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5082997975","display_name":"Jimmy Lin","orcid":"https://orcid.org/0000-0002-0661-7189"},"institutions":[{"id":"https://openalex.org/I151746483","display_name":"University of Waterloo","ror":"https://ror.org/01aff2v68","country_code":"CA","type":"education","lineage":["https://openalex.org/I151746483"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Jimmy Lin","raw_affiliation_strings":["University of Waterloo, Waterloo, ON, Canada"],"affiliations":[{"raw_affiliation_string":"University of Waterloo, Waterloo, ON, Canada","institution_ids":["https://openalex.org/I151746483"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5100682371"],"corresponding_institution_ids":["https://openalex.org/I66946132"],"apc_list":null,"apc_paid":null,"fwci":0.4385,"has_fulltext":true,"cited_by_count":2,"citation_normalized_percentile":{"value":0.65374825,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"221","last_page":"230"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11478","display_name":"Caching and Content Delivery","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11478","display_name":"Caching and Content Delivery","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12761","display_name":"Data Stream Mining Techniques","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10742","display_name":"Peer-to-Peer Network Technologies","score":0.9966999888420105,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8329755067825317},{"id":"https://openalex.org/keywords/partition","display_name":"Partition (number theory)","score":0.7186322212219238},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.670316219329834},{"id":"https://openalex.org/keywords/curse-of-dimensionality","display_name":"Curse of dimensionality","score":0.585780680179596},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.5541995167732239},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.4876176416873932},{"id":"https://openalex.org/keywords/search-engine","display_name":"Search engine","score":0.4281180500984192},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.23025226593017578}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8329755067825317},{"id":"https://openalex.org/C42812","wikidata":"https://www.wikidata.org/wiki/Q1082910","display_name":"Partition (number theory)","level":2,"score":0.7186322212219238},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.670316219329834},{"id":"https://openalex.org/C111030470","wikidata":"https://www.wikidata.org/wiki/Q1430460","display_name":"Curse of dimensionality","level":2,"score":0.585780680179596},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.5541995167732239},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.4876176416873932},{"id":"https://openalex.org/C97854310","wikidata":"https://www.wikidata.org/wiki/Q19541","display_name":"Search engine","level":2,"score":0.4281180500984192},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.23025226593017578},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3018661.3018727","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3018661.3018727","pdf_url":"http://dl.acm.org/ft_gateway.cfm?id=3018727&type=pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Tenth ACM International Conference on Web Search and Data Mining","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3018661.3018727","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3018661.3018727","pdf_url":"http://dl.acm.org/ft_gateway.cfm?id=3018727&type=pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Tenth ACM International Conference on Web Search and Data Mining","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1961012327","display_name":null,"funder_award_id":"1405688","funder_id":"https://openalex.org/F4320334593","funder_display_name":"Natural Sciences and Engineering Research Council of Canada"},{"id":"https://openalex.org/G2165548363","display_name":null,"funder_award_id":"Canada","funder_id":"https://openalex.org/F4320334593","funder_display_name":"Natural Sciences and Engineering Research Council of Canada"},{"id":"https://openalex.org/G2830599200","display_name":"II-EN: Hadoop NextGen Infrastructure for Heterogeneous Approaches to Data-Intensive Computing","funder_award_id":"1405688","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G4394039926","display_name":null,"funder_award_id":"1218043","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G6609948673","display_name":null,"funder_award_id":"IIS-1218043","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G6894796833","display_name":null,"funder_award_id":"IIS-1218043 and CNS-1405688","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G6945253973","display_name":null,"funder_award_id":"CNS-1405688","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G8284766523","display_name":null,"funder_award_id":"(NSERC)","funder_id":"https://openalex.org/F4320334593","funder_display_name":"Natural Sciences and Engineering Research Council of Canada"},{"id":"https://openalex.org/G848032724","display_name":null,"funder_award_id":"Science","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320334593","display_name":"Natural Sciences and Engineering Research Council of Canada","ror":"https://ror.org/01h531d29"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2585200994.pdf","grobid_xml":"https://content.openalex.org/works/W2585200994.grobid-xml"},"referenced_works_count":54,"referenced_works":["https://openalex.org/W2562782","https://openalex.org/W17129901","https://openalex.org/W410850256","https://openalex.org/W1800296434","https://openalex.org/W1967187838","https://openalex.org/W1982063824","https://openalex.org/W1984918470","https://openalex.org/W1990388042","https://openalex.org/W1994915827","https://openalex.org/W2002682102","https://openalex.org/W2022578651","https://openalex.org/W2036295879","https://openalex.org/W2039678943","https://openalex.org/W2041565863","https://openalex.org/W2048442462","https://openalex.org/W2071080574","https://openalex.org/W2073459066","https://openalex.org/W2074449313","https://openalex.org/W2088340225","https://openalex.org/W2091379987","https://openalex.org/W2098034778","https://openalex.org/W2098294664","https://openalex.org/W2099111758","https://openalex.org/W2104588805","https://openalex.org/W2106059686","https://openalex.org/W2108278040","https://openalex.org/W2108399535","https://openalex.org/W2112492518","https://openalex.org/W2114804204","https://openalex.org/W2115939989","https://openalex.org/W2121928206","https://openalex.org/W2130395434","https://openalex.org/W2141257014","https://openalex.org/W2141599568","https://openalex.org/W2145898068","https://openalex.org/W2146027514","https://openalex.org/W2147671191","https://openalex.org/W2152707065","https://openalex.org/W2156499390","https://openalex.org/W2157008678","https://openalex.org/W2171144050","https://openalex.org/W2250539671","https://openalex.org/W2340309946","https://openalex.org/W2342707026","https://openalex.org/W2399079233","https://openalex.org/W2467890656","https://openalex.org/W4214835284","https://openalex.org/W4242151277","https://openalex.org/W4244622952","https://openalex.org/W4244975643","https://openalex.org/W6638299864","https://openalex.org/W6683136438","https://openalex.org/W6703967046","https://openalex.org/W6712451647"],"related_works":["https://openalex.org/W4296209631","https://openalex.org/W3097449145","https://openalex.org/W2561617217","https://openalex.org/W2355801475","https://openalex.org/W4206659427","https://openalex.org/W2170062176","https://openalex.org/W4298130764","https://openalex.org/W2148135840","https://openalex.org/W106004901","https://openalex.org/W2973622563"],"abstract_inverted_index":{"The":[0],"basic":[1],"idea":[2],"behind":[3],"selective":[4,82,105],"search":[5,83,106,164],"is":[6,41,64,107],"to":[7,28,43,66,136,158],"partition":[8],"a":[9,20,50],"collection":[10],"into":[11,101],"topical":[12],"clusters,":[13],"and":[14,104,123,185],"for":[15,71],"each":[16,110],"query,":[17],"consider":[18,77],"only":[19,49,167],"subset":[21],"of":[22,53,81,133,140,170],"the":[23,54,78,138,141,149,171],"clusters":[24],"that":[25,39,154,201],"are":[26,99,156],"likely":[27],"contain":[29],"relevant":[30],"documents.":[31],"Previous":[32],"work":[33],"on":[34,84],"web":[35],"collections":[36,61,147],"has":[37],"shown":[38],"it":[40,63],"possible":[42],"retain":[44],"high-quality":[45],"results":[46],"while":[47,165],"considering":[48,166],"small":[51],"fraction":[52],"collection.":[55,172],"These":[56],"studies,":[57],"however,":[58],"assume":[59],"static":[60],"where":[62,89],"feasible":[65],"run":[67],"batch":[68,119,181],"clustering":[69,184],"algorithms":[70],"partitioning.":[72],"In":[73,95],"this":[74],"work,":[75],"we":[76,130,155,174],"novel":[79],"formulation":[80],"document":[85,142],"streams":[86],"(specifically,":[87],"tweets),":[88],"partitioning":[90],"must":[91],"be":[92,116,205],"performed":[93,108],"incrementally.":[94],"our":[96],"approach,":[97],"documents":[98],"partitioned":[100],"temporal":[102,126,190],"segments":[103,113],"within":[109],"segment:":[111],"these":[112],"can":[114],"either":[115],"clustered":[117],"using":[118],"or":[120],"online":[121,183],"algorithms,":[122],"at":[124],"different":[125,196],"granularities.":[127],"For":[128],"efficiency,":[129],"take":[131],"advantage":[132],"word":[134],"embeddings":[135],"reduce":[137],"dimensionality":[139],"vectors.":[143],"Experiments":[144],"with":[145],"test":[146],"from":[148,162],"TREC":[150],"Microblog":[151],"Tracks":[152],"show":[153],"able":[157],"achieve":[159],"precision":[160],"indistinguishable":[161],"exhaustive":[163],"around":[168],"5%":[169],"Interestingly,":[173],"observe":[175],"no":[176],"significant":[177],"effectiveness":[178],"differences":[179],"between":[180,186],"vs.":[182,188],"hourly":[187],"daily":[189],"segments,":[191],"despite":[192],"them":[193],"being":[194],"very":[195],"index":[197],"organizations.":[198],"This":[199],"suggests":[200],"architectural":[202],"choices":[203],"should":[204],"primarily":[206],"guided":[207],"by":[208],"efficiency":[209],"considerations.":[210]},"counts_by_year":[{"year":2019,"cited_by_count":2}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
