{"id":"https://openalex.org/W3116329819","doi":"https://doi.org/10.1145/3437963.3441793","title":"\u03b2-Cores: Robust Large-Scale Bayesian Data Summarization in the Presence of Outliers","display_name":"\u03b2-Cores: Robust Large-Scale Bayesian Data Summarization in the Presence of Outliers","publication_year":2021,"publication_date":"2021-03-06","ids":{"openalex":"https://openalex.org/W3116329819","doi":"https://doi.org/10.1145/3437963.3441793","mag":"3116329819"},"language":"en","primary_location":{"id":"doi:10.1145/3437963.3441793","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3437963.3441793","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3437963.3441793","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 14th ACM International Conference on Web Search and Data Mining","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3437963.3441793","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5084575988","display_name":"Dionysis Manousakas","orcid":null},"institutions":[{"id":"https://openalex.org/I241749","display_name":"University of Cambridge","ror":"https://ror.org/013meh722","country_code":"GB","type":"education","lineage":["https://openalex.org/I241749"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Dionysis Manousakas","raw_affiliation_strings":["University of Cambridge, Cambridge, United Kingdom"],"affiliations":[{"raw_affiliation_string":"University of Cambridge, Cambridge, United Kingdom","institution_ids":["https://openalex.org/I241749"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5010623957","display_name":"Cecilia Mascolo","orcid":"https://orcid.org/0000-0001-9614-4380"},"institutions":[{"id":"https://openalex.org/I241749","display_name":"University of Cambridge","ror":"https://ror.org/013meh722","country_code":"GB","type":"education","lineage":["https://openalex.org/I241749"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Cecilia Mascolo","raw_affiliation_strings":["University of Cambridge, Cambridge, United Kingdom"],"affiliations":[{"raw_affiliation_string":"University of Cambridge, Cambridge, United Kingdom","institution_ids":["https://openalex.org/I241749"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5084575988"],"corresponding_institution_ids":["https://openalex.org/I241749"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0118827,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"940","last_page":"948"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12814","display_name":"Gaussian Processes and Bayesian Inference","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12814","display_name":"Gaussian Processes and Bayesian Inference","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11512","display_name":"Anomaly Detection Techniques and Applications","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/automatic-summarization","display_name":"Automatic summarization","score":0.8691765069961548},{"id":"https://openalex.org/keywords/outlier","display_name":"Outlier","score":0.739368736743927},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6712683439254761},{"id":"https://openalex.org/keywords/scale","display_name":"Scale (ratio)","score":0.5477388501167297},{"id":"https://openalex.org/keywords/bayesian-probability","display_name":"Bayesian probability","score":0.5443255305290222},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.45064929127693176},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.38259458541870117},{"id":"https://openalex.org/keywords/cartography","display_name":"Cartography","score":0.05153173208236694}],"concepts":[{"id":"https://openalex.org/C170858558","wikidata":"https://www.wikidata.org/wiki/Q1394144","display_name":"Automatic summarization","level":2,"score":0.8691765069961548},{"id":"https://openalex.org/C79337645","wikidata":"https://www.wikidata.org/wiki/Q779824","display_name":"Outlier","level":2,"score":0.739368736743927},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6712683439254761},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.5477388501167297},{"id":"https://openalex.org/C107673813","wikidata":"https://www.wikidata.org/wiki/Q812534","display_name":"Bayesian probability","level":2,"score":0.5443255305290222},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.45064929127693176},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.38259458541870117},{"id":"https://openalex.org/C58640448","wikidata":"https://www.wikidata.org/wiki/Q42515","display_name":"Cartography","level":1,"score":0.05153173208236694},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0}],"mesh":[],"locations_count":5,"locations":[{"id":"doi:10.1145/3437963.3441793","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3437963.3441793","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3437963.3441793","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 14th ACM International Conference on Web Search and Data Mining","raw_type":"proceedings-article"},{"id":"pmh:oai:generic.eprints.org:1226779","is_oa":false,"landing_page_url":"http://publications.eng.cam.ac.uk/1226779/","pdf_url":null,"source":{"id":"https://openalex.org/S4406922847","display_name":"Cambridge University Engineering Department Publications Database","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Article"},{"id":"pmh:oai:generic.eprints.org:1239048","is_oa":false,"landing_page_url":"http://publications.eng.cam.ac.uk/1239048/","pdf_url":null,"source":{"id":"https://openalex.org/S4406922847","display_name":"Cambridge University Engineering Department Publications Database","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Conference or Workshop Item"},{"id":"pmh:oai:www.repository.cam.ac.uk:1810/311750","is_oa":false,"landing_page_url":"https://www.repository.cam.ac.uk/handle/1810/311750","pdf_url":null,"source":{"id":"https://openalex.org/S4306401777","display_name":"Apollo (University of Cambridge)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I241749","host_organization_name":"University of Cambridge","host_organization_lineage":["https://openalex.org/I241749"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Webpages"},{"id":"doi:10.17863/cam.58840","is_oa":true,"landing_page_url":"https://doi.org/10.17863/cam.58840","pdf_url":null,"source":{"id":"https://openalex.org/S7407050737","display_name":"Apollo","issn_l":null,"issn":[],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.1145/3437963.3441793","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3437963.3441793","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3437963.3441793","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 14th ACM International Conference on Web Search and Data Mining","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320334641","display_name":"Darwin College, University of Cambridge","ror":"https://ror.org/036wvdb36"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3116329819.pdf","grobid_xml":"https://content.openalex.org/works/W3116329819.grobid-xml"},"referenced_works_count":65,"referenced_works":["https://openalex.org/W178169250","https://openalex.org/W1559060276","https://openalex.org/W1574811474","https://openalex.org/W1798702550","https://openalex.org/W2014226385","https://openalex.org/W2035641621","https://openalex.org/W2040557557","https://openalex.org/W2046033161","https://openalex.org/W2054932610","https://openalex.org/W2068696370","https://openalex.org/W2095304671","https://openalex.org/W2110273389","https://openalex.org/W2117890631","https://openalex.org/W2120340025","https://openalex.org/W2123748412","https://openalex.org/W2125943921","https://openalex.org/W2129728285","https://openalex.org/W2140890285","https://openalex.org/W2146502635","https://openalex.org/W2150070703","https://openalex.org/W2150102617","https://openalex.org/W2152878561","https://openalex.org/W2153783671","https://openalex.org/W2165363188","https://openalex.org/W2166851633","https://openalex.org/W2167433878","https://openalex.org/W2167460663","https://openalex.org/W2404724429","https://openalex.org/W2509109313","https://openalex.org/W2559655401","https://openalex.org/W2559997609","https://openalex.org/W2597289420","https://openalex.org/W2604272474","https://openalex.org/W2607662938","https://openalex.org/W2612690371","https://openalex.org/W2622039412","https://openalex.org/W2765391880","https://openalex.org/W2767137871","https://openalex.org/W2785917027","https://openalex.org/W2790483052","https://openalex.org/W2804810839","https://openalex.org/W2913668833","https://openalex.org/W2939984132","https://openalex.org/W2948340529","https://openalex.org/W2963495973","https://openalex.org/W2963932270","https://openalex.org/W2964010828","https://openalex.org/W2964727689","https://openalex.org/W3011496861","https://openalex.org/W3036469904","https://openalex.org/W3101593554","https://openalex.org/W3120740533","https://openalex.org/W4205806204","https://openalex.org/W4212774754","https://openalex.org/W4300576158","https://openalex.org/W4394651571","https://openalex.org/W6678608707","https://openalex.org/W6679158198","https://openalex.org/W6679959949","https://openalex.org/W6684242912","https://openalex.org/W6687234023","https://openalex.org/W6722226382","https://openalex.org/W6739088070","https://openalex.org/W6785003484","https://openalex.org/W7048060829"],"related_works":["https://openalex.org/W2351187795","https://openalex.org/W2380641910","https://openalex.org/W52724171","https://openalex.org/W2589098947","https://openalex.org/W2285613413","https://openalex.org/W2561691764","https://openalex.org/W2604412476","https://openalex.org/W2811233515","https://openalex.org/W1896195941","https://openalex.org/W2156964798"],"abstract_inverted_index":{"Modern":[0],"machine":[1],"learning":[2],"applications":[3],"should":[4],"be":[5,164],"able":[6],"to":[7,23,64,85,95,151,204],"address":[8],"the":[9,26,43,56,90,96,101,108,118,125,142,177,210],"intrinsic":[10],"challenges":[11],"arising":[12],"over":[13],"inference":[14,48,75],"on":[15,51,124],"massive":[16],"real-world":[17],"datasets,":[18,87,187],"including":[19,192],"scalability":[20],"and":[21,40,88,145,171,185,188,197],"robustness":[22],"outliers.":[24,213],"Despite":[25],"multiple":[27],"benefits":[28],"of":[29,37,45,98,120,128,141,179,212],"Bayesian":[30,47,115,133,206],"methods":[31,208],"(such":[32],"as":[33,117],"uncertainty-aware":[34],"predictions,":[35],"incorporation":[36],"experts":[38],"knowledge,":[39],"hierarchical":[41],"modeling),":[42],"quality":[44],"classic":[46],"depends":[49],"critically":[50],"whether":[52],"observations":[53],"conform":[54],"with":[55,93],"assumed":[57],"data":[58,160,173],"generating":[59],"model,":[60],"which":[61],"is":[62],"impossible":[63],"guarantee":[65],"in":[66,78,100,167,182,209],"practice.":[67],"In":[68],"this":[69],"work,":[70],"we":[71,110,135],"propose":[72,136],"a":[73,79,112,137],"variational":[74,139],"method":[76,156],"that,":[77],"principled":[80],"way,":[81],"can":[82,163],"simultaneously":[83],"scale":[84],"large":[86],"robustify":[89],"inferred":[91],"posterior":[92,116,144],"respect":[94],"existence":[97],"outliers":[99],"observed":[102],"data.":[103],"Reformulating":[104],"Bayes":[105],"theorem":[106],"via":[107],"\u03b2-divergence,":[109],"posit":[111],"robustified":[113,143],"generalized":[114],"target":[119],"inference.":[121],"Moreover,":[122],"relying":[123],"recent":[126],"formulations":[127],"Riemannian":[129],"coresets":[130],"for":[131],"scalable":[132],"inference,":[134,195],"sparse":[138],"approximation":[140],"an":[146],"efficient":[147],"stochastic":[148],"black-box":[149],"algorithm":[150],"construct":[152],"it.":[153],"Overall":[154],"our":[155,180],"allows":[157],"releasing":[158],"cleansed":[159],"summaries":[161],"that":[162],"applied":[165],"broadly":[166],"scenarios":[168],"involving":[169],"structured":[170],"unstructured":[172],"contamination.":[174],"We":[175],"illustrate":[176],"applicability":[178],"approach":[181],"diverse":[183],"simulated":[184],"real":[186],"various":[189],"statistical":[190],"models,":[191],"Gaussian":[193],"mean":[194],"logistic":[196],"neural":[198],"linear":[199],"regression,":[200],"demonstrating":[201],"its":[202],"superiority":[203],"existing":[205],"summarization":[207],"presence":[211]},"counts_by_year":[],"updated_date":"2026-04-05T17:49:38.594831","created_date":"2025-10-10T00:00:00"}
