{"id":"https://openalex.org/W2808874433","doi":"https://doi.org/10.1145/3219819.3219901","title":"Anatomy of a Privacy-Safe Large-Scale Information Extraction System Over Email","display_name":"Anatomy of a Privacy-Safe Large-Scale Information Extraction System Over Email","publication_year":2018,"publication_date":"2018-07-19","ids":{"openalex":"https://openalex.org/W2808874433","doi":"https://doi.org/10.1145/3219819.3219901","mag":"2808874433"},"language":"en","primary_location":{"id":"doi:10.1145/3219819.3219901","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3219819.3219901","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3219819.3219901","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 24th ACM SIGKDD International Conference on Knowledge Discovery &amp; Data Mining","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3219819.3219901","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102900861","display_name":"Ying Sheng","orcid":"https://orcid.org/0000-0003-4561-2097"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ying Sheng","raw_affiliation_strings":["Google, Mountain View, CA, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Google, Mountain View, CA, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102945075","display_name":"Sandeep Tata","orcid":"https://orcid.org/0009-0007-7785-5516"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Sandeep Tata","raw_affiliation_strings":["Google, Mountain View, CA, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Google, Mountain View, CA, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5060825780","display_name":"James B. Wendt","orcid":"https://orcid.org/0000-0003-4980-5152"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"James B. Wendt","raw_affiliation_strings":["Google, Mountain View, CA, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Google, Mountain View, CA, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5078134635","display_name":"Jing Xie","orcid":"https://orcid.org/0000-0003-0751-0380"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jing Xie","raw_affiliation_strings":["Google, Mountain View, CA, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Google, Mountain View, CA, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101361531","display_name":"Qi Zhao","orcid":"https://orcid.org/0000-0001-6750-966X"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Qi Zhao","raw_affiliation_strings":["Google, Mountain View, CA, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Google, Mountain View, CA, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5037200145","display_name":"Marc Najork","orcid":"https://orcid.org/0000-0003-1423-0854"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Marc Najork","raw_affiliation_strings":["Google, Mountain View, CA, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Google, Mountain View, CA, USA","institution_ids":["https://openalex.org/I1291425158"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":4.425,"has_fulltext":true,"cited_by_count":18,"citation_normalized_percentile":{"value":0.95433751,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"734","last_page":"743"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12607","display_name":"Personal Information Management and User Behavior","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T12607","display_name":"Personal Information Management and User Behavior","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11704","display_name":"Mobile Crowdsensing and Crowdsourcing","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1706","display_name":"Computer Science Applications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11644","display_name":"Spam and Phishing Detection","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8262511491775513},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5938031673431396},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.5828084945678711},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.541493833065033},{"id":"https://openalex.org/keywords/payment","display_name":"Payment","score":0.4987509250640869},{"id":"https://openalex.org/keywords/information-extraction","display_name":"Information extraction","score":0.483661025762558},{"id":"https://openalex.org/keywords/service","display_name":"Service (business)","score":0.47858095169067383},{"id":"https://openalex.org/keywords/scale","display_name":"Scale (ratio)","score":0.4327211081981659},{"id":"https://openalex.org/keywords/nosql","display_name":"NoSQL","score":0.4262217879295349},{"id":"https://openalex.org/keywords/safeguarding","display_name":"Safeguarding","score":0.4205491542816162},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.380593478679657},{"id":"https://openalex.org/keywords/computer-security","display_name":"Computer security","score":0.3444977402687073},{"id":"https://openalex.org/keywords/internet-privacy","display_name":"Internet privacy","score":0.34138625860214233},{"id":"https://openalex.org/keywords/big-data","display_name":"Big data","score":0.316422700881958},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.26092246174812317},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.15091818571090698}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8262511491775513},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5938031673431396},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.5828084945678711},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.541493833065033},{"id":"https://openalex.org/C145097563","wikidata":"https://www.wikidata.org/wiki/Q1148747","display_name":"Payment","level":2,"score":0.4987509250640869},{"id":"https://openalex.org/C195807954","wikidata":"https://www.wikidata.org/wiki/Q1662562","display_name":"Information extraction","level":2,"score":0.483661025762558},{"id":"https://openalex.org/C2780378061","wikidata":"https://www.wikidata.org/wiki/Q25351891","display_name":"Service (business)","level":2,"score":0.47858095169067383},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.4327211081981659},{"id":"https://openalex.org/C2779599972","wikidata":"https://www.wikidata.org/wiki/Q82231","display_name":"NoSQL","level":3,"score":0.4262217879295349},{"id":"https://openalex.org/C2776743756","wikidata":"https://www.wikidata.org/wiki/Q5097921","display_name":"Safeguarding","level":2,"score":0.4205491542816162},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.380593478679657},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.3444977402687073},{"id":"https://openalex.org/C108827166","wikidata":"https://www.wikidata.org/wiki/Q175975","display_name":"Internet privacy","level":1,"score":0.34138625860214233},{"id":"https://openalex.org/C75684735","wikidata":"https://www.wikidata.org/wiki/Q858810","display_name":"Big data","level":2,"score":0.316422700881958},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.26092246174812317},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.15091818571090698},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C136264566","wikidata":"https://www.wikidata.org/wiki/Q159810","display_name":"Economy","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C159110408","wikidata":"https://www.wikidata.org/wiki/Q121176","display_name":"Nursing","level":1,"score":0.0},{"id":"https://openalex.org/C71924100","wikidata":"https://www.wikidata.org/wiki/Q11190","display_name":"Medicine","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3219819.3219901","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3219819.3219901","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3219819.3219901","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 24th ACM SIGKDD International Conference on Knowledge Discovery &amp; Data Mining","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3219819.3219901","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3219819.3219901","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3219819.3219901","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 24th ACM SIGKDD International Conference on Knowledge Discovery &amp; Data Mining","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2808874433.pdf","grobid_xml":"https://content.openalex.org/works/W2808874433.grobid-xml"},"referenced_works_count":51,"referenced_works":["https://openalex.org/W1493490255","https://openalex.org/W1553019137","https://openalex.org/W1568339100","https://openalex.org/W1593505700","https://openalex.org/W1934084512","https://openalex.org/W1974922810","https://openalex.org/W1980176056","https://openalex.org/W1999595522","https://openalex.org/W2007652426","https://openalex.org/W2052742498","https://openalex.org/W2115461474","https://openalex.org/W2123958887","https://openalex.org/W2132679783","https://openalex.org/W2133227149","https://openalex.org/W2134479759","https://openalex.org/W2134510195","https://openalex.org/W2136114025","https://openalex.org/W2145772003","https://openalex.org/W2146502635","https://openalex.org/W2148440006","https://openalex.org/W2150721933","https://openalex.org/W2152565070","https://openalex.org/W2158188757","https://openalex.org/W2159024459","https://openalex.org/W2162340487","https://openalex.org/W2164301055","https://openalex.org/W2255862008","https://openalex.org/W2278392294","https://openalex.org/W2295816791","https://openalex.org/W2404161646","https://openalex.org/W2470673105","https://openalex.org/W2471366537","https://openalex.org/W2474838075","https://openalex.org/W2536339198","https://openalex.org/W2584049620","https://openalex.org/W2598202925","https://openalex.org/W2616357320","https://openalex.org/W2616462372","https://openalex.org/W2732547613","https://openalex.org/W2753278460","https://openalex.org/W2753688405","https://openalex.org/W2784739162","https://openalex.org/W2798894547","https://openalex.org/W2901597048","https://openalex.org/W2950577311","https://openalex.org/W2951345965","https://openalex.org/W2953132584","https://openalex.org/W2998508934","https://openalex.org/W3012264151","https://openalex.org/W3019237274","https://openalex.org/W3105187107"],"related_works":["https://openalex.org/W4387497383","https://openalex.org/W3183948672","https://openalex.org/W3173606202","https://openalex.org/W3110381201","https://openalex.org/W2948807893","https://openalex.org/W2935909890","https://openalex.org/W2778153218","https://openalex.org/W2758277628","https://openalex.org/W1531601525","https://openalex.org/W2374550342"],"abstract_inverted_index":{"Extracting":[0],"structured":[1],"data":[2],"from":[3,54,180],"emails":[4],"can":[5],"enable":[6],"several":[7,170],"assistive":[8],"experiences,":[9],"such":[10],"as":[11],"reminding":[12],"the":[13,24,39,68,71,86,94,98,105,119,127,131,156,159],"user":[14,40],"when":[15],"a":[16,28,49,60,81,90],"bill":[17],"payment":[18],"is":[19,41,57],"due,":[20],"answering":[21],"queries":[22],"about":[23],"departure":[25],"time":[26],"of":[27,70,100,141,158,172],"booked":[29],"flight,":[30],"or":[31],"proactively":[32],"surfacing":[33],"an":[34],"emailed":[35],"discount":[36],"coupon":[37],"while":[38],"at":[42],"that":[43,56],"store.":[44],"This":[45],"paper":[46],"presents":[47],"Juicer,":[48],"system":[50,72],"for":[51,93],"extracting":[52],"information":[53,178],"email":[55,83],"serving":[58],"over":[59],"billion":[61],"Gmail":[62],"users":[63,101],"daily.":[64],"We":[65,117,137],"describe":[66,118],"how":[67],"design":[69,120],"was":[73],"informed":[74],"by":[75],"three":[76,142],"key":[77],"principles:":[78],"scaling":[79],"to":[80,88,112,134,164],"planet-wide":[82],"service,":[84],"isolating":[85],"complexity":[87],"provide":[89],"simple":[91],"experience":[92],"developer,":[95],"and":[96,104,130,152],"safeguarding":[97],"privacy":[99],"(our":[102],"team":[103],"developers":[106],"we":[107,168],"support":[108],"are":[109],"not":[110],"allowed":[111],"view":[113],"any":[114],"single":[115],"email).":[116],"tradeoffs":[121],"made":[122],"in":[123,175],"building":[124],"this":[125,147],"system,":[126],"challenges":[128,162],"faced":[129],"approaches":[132],"used":[133],"tackle":[135],"them.":[136],"present":[138],"case":[139],"studies":[140],"extraction":[143,179],"tasks":[144],"implemented":[145],"on":[146],"platform---bill":[148],"reminders,":[149],"commercial":[150],"offers,":[151],"hotel":[153],"reservations---to":[154],"illustrate":[155],"effectiveness":[157],"platform":[160],"despite":[161],"unique":[163],"each":[165],"task.":[166],"Finally,":[167],"outline":[169],"areas":[171],"ongoing":[173],"research":[174],"large-scale":[176],"machine-learned":[177],"email.":[181]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":2},{"year":2022,"cited_by_count":6},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":3},{"year":2019,"cited_by_count":3},{"year":2018,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
