{"id":"https://openalex.org/W3015309972","doi":"https://doi.org/10.1109/icassp40776.2020.9054158","title":"Addressing Challenges in Building Web-Scale Content Classification Systems","display_name":"Addressing Challenges in Building Web-Scale Content Classification Systems","publication_year":2020,"publication_date":"2020-04-09","ids":{"openalex":"https://openalex.org/W3015309972","doi":"https://doi.org/10.1109/icassp40776.2020.9054158","mag":"3015309972"},"language":"en","primary_location":{"id":"doi:10.1109/icassp40776.2020.9054158","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp40776.2020.9054158","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2020 - 2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5038137229","display_name":"Aditya Srinivas Timmaraju","orcid":"https://orcid.org/0009-0001-5934-6079"},"institutions":[{"id":"https://openalex.org/I4210114444","display_name":"Meta (United States)","ror":"https://ror.org/01zbnvs85","country_code":"US","type":"company","lineage":["https://openalex.org/I4210114444"]},{"id":"https://openalex.org/I4210099336","display_name":"Menlo School","ror":"https://ror.org/01240pn49","country_code":"US","type":"education","lineage":["https://openalex.org/I4210099336"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Aditya Srinivas Timmaraju","raw_affiliation_strings":["Facebook Inc, Menlo Park, CA"],"affiliations":[{"raw_affiliation_string":"Facebook Inc, Menlo Park, CA","institution_ids":["https://openalex.org/I4210114444","https://openalex.org/I4210099336"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026647956","display_name":"Angli Liu","orcid":null},"institutions":[{"id":"https://openalex.org/I4210099336","display_name":"Menlo School","ror":"https://ror.org/01240pn49","country_code":"US","type":"education","lineage":["https://openalex.org/I4210099336"]},{"id":"https://openalex.org/I4210114444","display_name":"Meta (United States)","ror":"https://ror.org/01zbnvs85","country_code":"US","type":"company","lineage":["https://openalex.org/I4210114444"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Angli Liu","raw_affiliation_strings":["Facebook Inc, Menlo Park, CA"],"affiliations":[{"raw_affiliation_string":"Facebook Inc, Menlo Park, CA","institution_ids":["https://openalex.org/I4210114444","https://openalex.org/I4210099336"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5040307243","display_name":"Pushkar Tripathi","orcid":"https://orcid.org/0000-0001-5135-6136"},"institutions":[{"id":"https://openalex.org/I4210099336","display_name":"Menlo School","ror":"https://ror.org/01240pn49","country_code":"US","type":"education","lineage":["https://openalex.org/I4210099336"]},{"id":"https://openalex.org/I4210114444","display_name":"Meta (United States)","ror":"https://ror.org/01zbnvs85","country_code":"US","type":"company","lineage":["https://openalex.org/I4210114444"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Pushkar Tripathi","raw_affiliation_strings":["Facebook Inc, Menlo Park, CA"],"affiliations":[{"raw_affiliation_string":"Facebook Inc, Menlo Park, CA","institution_ids":["https://openalex.org/I4210114444","https://openalex.org/I4210099336"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5038137229"],"corresponding_institution_ids":["https://openalex.org/I4210099336","https://openalex.org/I4210114444"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.03372972,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"abs 1910 3771","issue":null,"first_page":"8134","last_page":"8138"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9968000054359436,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13083","display_name":"Advanced Text Analysis Techniques","score":0.9962000250816345,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7315006852149963},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.618821382522583},{"id":"https://openalex.org/keywords/the-internet","display_name":"The Internet","score":0.5592387914657593},{"id":"https://openalex.org/keywords/variety","display_name":"Variety (cybernetics)","score":0.4884137511253357},{"id":"https://openalex.org/keywords/modal","display_name":"Modal","score":0.47479528188705444},{"id":"https://openalex.org/keywords/taxonomy","display_name":"Taxonomy (biology)","score":0.4359772801399231},{"id":"https://openalex.org/keywords/web-content","display_name":"Web content","score":0.4313500225543976},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.4036853313446045},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3939388692378998},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.39003652334213257},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.38613733649253845},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.3403569459915161},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.32545483112335205}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7315006852149963},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.618821382522583},{"id":"https://openalex.org/C110875604","wikidata":"https://www.wikidata.org/wiki/Q75","display_name":"The Internet","level":2,"score":0.5592387914657593},{"id":"https://openalex.org/C136197465","wikidata":"https://www.wikidata.org/wiki/Q1729295","display_name":"Variety (cybernetics)","level":2,"score":0.4884137511253357},{"id":"https://openalex.org/C71139939","wikidata":"https://www.wikidata.org/wiki/Q910194","display_name":"Modal","level":2,"score":0.47479528188705444},{"id":"https://openalex.org/C58642233","wikidata":"https://www.wikidata.org/wiki/Q8269924","display_name":"Taxonomy (biology)","level":2,"score":0.4359772801399231},{"id":"https://openalex.org/C2776324614","wikidata":"https://www.wikidata.org/wiki/Q3948731","display_name":"Web content","level":3,"score":0.4313500225543976},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.4036853313446045},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3939388692378998},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.39003652334213257},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.38613733649253845},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.3403569459915161},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.32545483112335205},{"id":"https://openalex.org/C188027245","wikidata":"https://www.wikidata.org/wiki/Q750446","display_name":"Polymer chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C59822182","wikidata":"https://www.wikidata.org/wiki/Q441","display_name":"Botany","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp40776.2020.9054158","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp40776.2020.9054158","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2020 - 2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.6899999976158142,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":35,"referenced_works":["https://openalex.org/W1522301498","https://openalex.org/W1775434803","https://openalex.org/W1921293667","https://openalex.org/W2017489100","https://openalex.org/W2026844232","https://openalex.org/W2108598243","https://openalex.org/W2112796928","https://openalex.org/W2212290806","https://openalex.org/W2293432512","https://openalex.org/W2407776548","https://openalex.org/W2565166462","https://openalex.org/W2592335154","https://openalex.org/W2799269579","https://openalex.org/W2891555348","https://openalex.org/W2896457183","https://openalex.org/W2963026768","https://openalex.org/W2963118869","https://openalex.org/W2963153906","https://openalex.org/W2963341956","https://openalex.org/W2963403868","https://openalex.org/W2963413667","https://openalex.org/W2963703197","https://openalex.org/W2964121744","https://openalex.org/W2970854433","https://openalex.org/W2979826702","https://openalex.org/W4299579390","https://openalex.org/W4385245566","https://openalex.org/W6631190155","https://openalex.org/W6637845829","https://openalex.org/W6640298173","https://openalex.org/W6713582272","https://openalex.org/W6739901393","https://openalex.org/W6755207826","https://openalex.org/W6765510844","https://openalex.org/W6769243733"],"related_works":["https://openalex.org/W2032233321","https://openalex.org/W3121970507","https://openalex.org/W2110028391","https://openalex.org/W54497855","https://openalex.org/W217960748","https://openalex.org/W3125814499","https://openalex.org/W2090827041","https://openalex.org/W2094012830","https://openalex.org/W187246281","https://openalex.org/W2079194830"],"abstract_inverted_index":{"Understanding":[0],"the":[1,7,10,34,39,73,116],"semantic":[2],"meaning":[3],"of":[4,12,43,75,118],"content":[5,23,58],"on":[6,46],"web":[8],"through":[9],"lens":[11],"a":[13,57,92],"taxonomy":[14],"has":[15],"many":[16],"practical":[17],"advantages.":[18],"However,":[19],"when":[20],"building":[21,56],"large-scale":[22],"classification":[24,59,105],"systems,":[25],"practitioners":[26],"are":[27],"faced":[28],"with":[29],"unique":[30],"challenges":[31],"involving":[32],"finding":[33],"best":[35],"ways":[36],"to":[37,100],"leverage":[38],"scale":[40],"and":[41,78,95],"variety":[42],"data":[44],"available":[45],"internet":[47],"platforms.":[48],"We":[49,70,82,107],"present":[50],"learnings":[51],"from":[52],"our":[53],"efforts":[54],"in":[55,103,120],"system":[60],"for":[61,86],"multiple":[62],"document":[63],"types":[64],"at":[65],"Facebook":[66],"using":[67],"Multi-modal":[68],"Transformers.":[69],"empirically":[71],"demonstrate":[72],"effectiveness":[74],"multi-lingual,":[76],"multi-modal":[77],"cross-document":[79],"type":[80],"learning.":[81],"describe":[83],"effective":[84],"strategies":[85],"exploiting":[87],"weakly":[88],"supervised":[89],"signals":[90],"as":[91],"pre-training":[93],"step":[94],"show":[96],"that":[97,113],"they":[98],"lead":[99],"significant":[101],"gains":[102],"downstream":[104],"accuracy.":[106],"also":[108],"discuss":[109],"label":[110],"collection":[111],"schemes":[112],"help":[114],"minimize":[115],"amount":[117],"noise":[119],"collected":[121],"data.":[122]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
