{"id":"https://openalex.org/W3168315715","doi":"https://doi.org/10.1145/3447548.3467435","title":"Needle in a Haystack","display_name":"Needle in a Haystack","publication_year":2021,"publication_date":"2021-08-13","ids":{"openalex":"https://openalex.org/W3168315715","doi":"https://doi.org/10.1145/3447548.3467435","mag":"3168315715"},"language":"en","primary_location":{"id":"doi:10.1145/3447548.3467435","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3447548.3467435","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 27th ACM SIGKDD Conference on Knowledge Discovery &amp; Data Mining","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5080946416","display_name":"Neil G. Marchant","orcid":"https://orcid.org/0000-0001-5713-4235"},"institutions":[{"id":"https://openalex.org/I165779595","display_name":"University of Melbourne","ror":"https://ror.org/01ej9dk98","country_code":"AU","type":"education","lineage":["https://openalex.org/I165779595"]}],"countries":["AU"],"is_corresponding":true,"raw_author_name":"Neil G. Marchant","raw_affiliation_strings":["University of Melbourne, Melbourne, Australia"],"affiliations":[{"raw_affiliation_string":"University of Melbourne, Melbourne, Australia","institution_ids":["https://openalex.org/I165779595"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5078824132","display_name":"Benjamin I. P. Rubinstein","orcid":"https://orcid.org/0000-0002-2947-6980"},"institutions":[{"id":"https://openalex.org/I165779595","display_name":"University of Melbourne","ror":"https://ror.org/01ej9dk98","country_code":"AU","type":"education","lineage":["https://openalex.org/I165779595"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Benjamin I. P. Rubinstein","raw_affiliation_strings":["University of Melbourne, Melbourne, Australia"],"affiliations":[{"raw_affiliation_string":"University of Melbourne, Melbourne, Australia","institution_ids":["https://openalex.org/I165779595"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5080946416"],"corresponding_institution_ids":["https://openalex.org/I165779595"],"apc_list":null,"apc_paid":null,"fwci":0.3486,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.61526679,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"1180","last_page":"1190"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11106","display_name":"Data Management and Algorithms","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11819","display_name":"Data-Driven Disease Surveillance","score":0.9919000267982483,"subfield":{"id":"https://openalex.org/subfields/2713","display_name":"Epidemiology"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/haystack","display_name":"Haystack","score":0.8670022487640381},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6652417182922363},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.6516462564468384},{"id":"https://openalex.org/keywords/consistency","display_name":"Consistency (knowledge bases)","score":0.5884688496589661},{"id":"https://openalex.org/keywords/metric","display_name":"Metric (unit)","score":0.4592449963092804},{"id":"https://openalex.org/keywords/dirichlet-distribution","display_name":"Dirichlet distribution","score":0.44805383682250977},{"id":"https://openalex.org/keywords/quantile","display_name":"Quantile","score":0.43584805727005005},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.41943010687828064},{"id":"https://openalex.org/keywords/sampling","display_name":"Sampling (signal processing)","score":0.41254955530166626},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4117977023124695},{"id":"https://openalex.org/keywords/class","display_name":"Class (philosophy)","score":0.4110739231109619},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.36123889684677124},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3002552390098572},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.2476404309272766},{"id":"https://openalex.org/keywords/econometrics","display_name":"Econometrics","score":0.22508424520492554}],"concepts":[{"id":"https://openalex.org/C13424479","wikidata":"https://www.wikidata.org/wiki/Q5687237","display_name":"Haystack","level":2,"score":0.8670022487640381},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6652417182922363},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.6516462564468384},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.5884688496589661},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.4592449963092804},{"id":"https://openalex.org/C169214877","wikidata":"https://www.wikidata.org/wiki/Q981016","display_name":"Dirichlet distribution","level":3,"score":0.44805383682250977},{"id":"https://openalex.org/C118671147","wikidata":"https://www.wikidata.org/wiki/Q578714","display_name":"Quantile","level":2,"score":0.43584805727005005},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.41943010687828064},{"id":"https://openalex.org/C140779682","wikidata":"https://www.wikidata.org/wiki/Q210868","display_name":"Sampling (signal processing)","level":3,"score":0.41254955530166626},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4117977023124695},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.4110739231109619},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.36123889684677124},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3002552390098572},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.2476404309272766},{"id":"https://openalex.org/C149782125","wikidata":"https://www.wikidata.org/wiki/Q160039","display_name":"Econometrics","level":1,"score":0.22508424520492554},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.0},{"id":"https://openalex.org/C106131492","wikidata":"https://www.wikidata.org/wiki/Q3072260","display_name":"Filter (signal processing)","level":2,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C182310444","wikidata":"https://www.wikidata.org/wiki/Q1332643","display_name":"Boundary value problem","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3447548.3467435","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3447548.3467435","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 27th ACM SIGKDD Conference on Knowledge Discovery &amp; Data Mining","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/1","score":0.6800000071525574,"display_name":"No poverty"}],"awards":[{"id":"https://openalex.org/G3841472616","display_name":null,"funder_award_id":"DP150103710","funder_id":"https://openalex.org/F4320334704","funder_display_name":"Australian Research Council"}],"funders":[{"id":"https://openalex.org/F4320334704","display_name":"Australian Research Council","ror":"https://ror.org/05mmh0f86"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":30,"referenced_works":["https://openalex.org/W1665662210","https://openalex.org/W1981590391","https://openalex.org/W2012035409","https://openalex.org/W2018647810","https://openalex.org/W2018752234","https://openalex.org/W2022348393","https://openalex.org/W2022637125","https://openalex.org/W2045685519","https://openalex.org/W2057495142","https://openalex.org/W2147632348","https://openalex.org/W2165558283","https://openalex.org/W2292361954","https://openalex.org/W2295598076","https://openalex.org/W2342632098","https://openalex.org/W2399495295","https://openalex.org/W2476423474","https://openalex.org/W2592734881","https://openalex.org/W2735102987","https://openalex.org/W2751013112","https://openalex.org/W2921905113","https://openalex.org/W2970923431","https://openalex.org/W2999905431","https://openalex.org/W3039200386","https://openalex.org/W3087402115","https://openalex.org/W3102476541","https://openalex.org/W3103126047","https://openalex.org/W3103616461","https://openalex.org/W3103991399","https://openalex.org/W4256250826","https://openalex.org/W4299432846"],"related_works":["https://openalex.org/W4253878822","https://openalex.org/W3030221677","https://openalex.org/W1965563707","https://openalex.org/W4210692028","https://openalex.org/W1736550718","https://openalex.org/W2808729870","https://openalex.org/W2479343091","https://openalex.org/W2278064783","https://openalex.org/W3174858427","https://openalex.org/W1972480475"],"abstract_inverted_index":{"Important":[0],"tasks":[1],"like":[2],"record":[3],"linkage":[4],"and":[5,99,122,132],"extreme":[6,9],"classification":[7],"demonstrate":[8,144],"class":[10],"imbalance,":[11],"with":[12,71,136],"1":[13,18],"minority":[14],"instance":[15],"to":[16,33,78,110,114,149],"every":[17],"million":[19],"or":[20,47,74],"more":[21],"majority":[22],"instances.":[23],"Obtaining":[24],"a":[25,84,95,106,123],"sufficient":[26],"sample":[27],"of":[28],"all":[29],"classes,":[30],"even":[31],"just":[32],"achieve":[34],"statistically-significant":[35],"evaluation,":[36],"is":[37],"so":[38],"challenging":[39],"that":[40,139],"most":[41],"current":[42],"approaches":[43],"yield":[44],"poor":[45],"estimates":[46,67],"incur":[48],"impractical":[49],"cost.":[50],"Where":[51],"importance":[52,92],"sampling":[53],"has":[54],"been":[55],"levied":[56],"against":[57],"this":[58],"challenge,":[59],"restrictive":[60],"constraints":[61],"are":[62],"placed":[63],"on":[64,90,151],"performance":[65,97,130],"metrics,":[66],"do":[68],"not":[69],"come":[70],"appropriate":[72],"guarantees,":[73],"evaluations":[75],"cannot":[76],"adapt":[77],"incoming":[79],"labels.":[80],"This":[81],"paper":[82],"develops":[83],"framework":[85,104,135],"for":[86,101,127],"online":[87],"evaluation":[88],"based":[89],"adaptive":[91],"sampling.":[93],"Given":[94],"target":[96],"metric":[98],"model":[100],"p(y|x),":[102],"the":[103,128],"adapts":[105],"distribution":[107],"over":[108],"items":[109],"label":[111,153],"in":[112],"order":[113],"maximize":[115],"statistical":[116],"precision.":[117],"We":[118],"establish":[119],"strong":[120],"consistency":[121],"central":[124],"limit":[125],"theorem":[126],"resulting":[129],"estimates,":[131],"instantiate":[133],"our":[134],"worked":[137],"examples":[138],"leverage":[140],"Dirichlet-tree":[141],"models.":[142],"Experiments":[143],"an":[145],"average":[146],"MSE":[147],"superior":[148],"state-of-the-art":[150],"fixed":[152],"budgets.":[154]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
