{"id":"https://openalex.org/W4385567868","doi":"https://doi.org/10.1145/3580305.3599921","title":"TwHIN-BERT: A Socially-Enriched Pre-trained Language Model for Multilingual Tweet Representations at Twitter","display_name":"TwHIN-BERT: A Socially-Enriched Pre-trained Language Model for Multilingual Tweet Representations at Twitter","publication_year":2023,"publication_date":"2023-08-04","ids":{"openalex":"https://openalex.org/W4385567868","doi":"https://doi.org/10.1145/3580305.3599921"},"language":"en","primary_location":{"id":"doi:10.1145/3580305.3599921","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3580305.3599921","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 29th ACM SIGKDD Conference on Knowledge Discovery and Data Mining","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5022999126","display_name":"Xinyang Zhang","orcid":"https://orcid.org/0000-0001-6474-682X"},"institutions":[{"id":"https://openalex.org/I157725225","display_name":"University of Illinois Urbana-Champaign","ror":"https://ror.org/047426m28","country_code":"US","type":"education","lineage":["https://openalex.org/I157725225"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Xinyang Zhang","raw_affiliation_strings":["The University of Illinois at Urbana-Champaign, Urbana, IL, USA"],"affiliations":[{"raw_affiliation_string":"The University of Illinois at Urbana-Champaign, Urbana, IL, USA","institution_ids":["https://openalex.org/I157725225"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5015209115","display_name":"Yury Malkov","orcid":"https://orcid.org/0000-0003-4324-6433"},"institutions":[{"id":"https://openalex.org/I113979032","display_name":"Twitter (United States)","ror":"https://ror.org/04wt43v05","country_code":"US","type":"company","lineage":["https://openalex.org/I113979032"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yury Malkov","raw_affiliation_strings":["Twitter Cortex, San Francisco, CA, USA"],"affiliations":[{"raw_affiliation_string":"Twitter Cortex, San Francisco, CA, USA","institution_ids":["https://openalex.org/I113979032"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047959451","display_name":"Omar U. Florez","orcid":"https://orcid.org/0009-0008-7884-8825"},"institutions":[{"id":"https://openalex.org/I113979032","display_name":"Twitter (United States)","ror":"https://ror.org/04wt43v05","country_code":"US","type":"company","lineage":["https://openalex.org/I113979032"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Omar Florez","raw_affiliation_strings":["Twitter Cortex, San Francisco, CA, USA"],"affiliations":[{"raw_affiliation_string":"Twitter Cortex, San Francisco, CA, USA","institution_ids":["https://openalex.org/I113979032"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5007855897","display_name":"Serim Park","orcid":"https://orcid.org/0009-0004-0131-245X"},"institutions":[{"id":"https://openalex.org/I113979032","display_name":"Twitter (United States)","ror":"https://ror.org/04wt43v05","country_code":"US","type":"company","lineage":["https://openalex.org/I113979032"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Serim Park","raw_affiliation_strings":["Twitter Cortex, San Francisco, CA, USA"],"affiliations":[{"raw_affiliation_string":"Twitter Cortex, San Francisco, CA, USA","institution_ids":["https://openalex.org/I113979032"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5041664883","display_name":"Brian McWilliams","orcid":"https://orcid.org/0009-0002-7433-1702"},"institutions":[{"id":"https://openalex.org/I113979032","display_name":"Twitter (United States)","ror":"https://ror.org/04wt43v05","country_code":"US","type":"company","lineage":["https://openalex.org/I113979032"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Brian McWilliams","raw_affiliation_strings":["Twitter Cortex, San Francisco, CA, USA"],"affiliations":[{"raw_affiliation_string":"Twitter Cortex, San Francisco, CA, USA","institution_ids":["https://openalex.org/I113979032"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5019539533","display_name":"Jiawei Han","orcid":"https://orcid.org/0000-0002-3629-2696"},"institutions":[{"id":"https://openalex.org/I157725225","display_name":"University of Illinois Urbana-Champaign","ror":"https://ror.org/047426m28","country_code":"US","type":"education","lineage":["https://openalex.org/I157725225"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jiawei Han","raw_affiliation_strings":["The University of Illinois at Urbana-Champaign, Urbana, IL, USA"],"affiliations":[{"raw_affiliation_string":"The University of Illinois at Urbana-Champaign, Urbana, IL, USA","institution_ids":["https://openalex.org/I157725225"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5035119055","display_name":"Ahmed El-Kishky","orcid":"https://orcid.org/0000-0003-0121-7781"},"institutions":[{"id":"https://openalex.org/I113979032","display_name":"Twitter (United States)","ror":"https://ror.org/04wt43v05","country_code":"US","type":"company","lineage":["https://openalex.org/I113979032"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ahmed El-Kishky","raw_affiliation_strings":["Twitter Cortex, San Francisco, CA, USA"],"affiliations":[{"raw_affiliation_string":"Twitter Cortex, San Francisco, CA, USA","institution_ids":["https://openalex.org/I113979032"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5022999126"],"corresponding_institution_ids":["https://openalex.org/I157725225"],"apc_list":null,"apc_paid":null,"fwci":8.3503,"has_fulltext":false,"cited_by_count":48,"citation_normalized_percentile":{"value":0.98244375,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"5597","last_page":"5607"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9972000122070312,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11273","display_name":"Advanced Graph Neural Networks","score":0.9944000244140625,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8371737003326416},{"id":"https://openalex.org/keywords/social-media","display_name":"Social media","score":0.6708755493164062},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.6381610035896301},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.6321801543235779},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5958028435707092},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.5863213539123535},{"id":"https://openalex.org/keywords/metric","display_name":"Metric (unit)","score":0.5795860886573792},{"id":"https://openalex.org/keywords/social-network","display_name":"Social network (sociolinguistics)","score":0.5462157726287842},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.4564078748226166},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.38825690746307373},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.344418466091156}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8371737003326416},{"id":"https://openalex.org/C518677369","wikidata":"https://www.wikidata.org/wiki/Q202833","display_name":"Social media","level":2,"score":0.6708755493164062},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.6381610035896301},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6321801543235779},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5958028435707092},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.5863213539123535},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.5795860886573792},{"id":"https://openalex.org/C4727928","wikidata":"https://www.wikidata.org/wiki/Q17164759","display_name":"Social network (sociolinguistics)","level":3,"score":0.5462157726287842},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.4564078748226166},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.38825690746307373},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.344418466091156},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3580305.3599921","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3580305.3599921","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 29th ACM SIGKDD Conference on Knowledge Discovery and Data Mining","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.6800000071525574}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":38,"referenced_works":["https://openalex.org/W1888005072","https://openalex.org/W1996263819","https://openalex.org/W2062797058","https://openalex.org/W2075010670","https://openalex.org/W2124509324","https://openalex.org/W2145658888","https://openalex.org/W2154851992","https://openalex.org/W2493916176","https://openalex.org/W2577283662","https://openalex.org/W2584620251","https://openalex.org/W2743104969","https://openalex.org/W2747329762","https://openalex.org/W2759136286","https://openalex.org/W2806198715","https://openalex.org/W2807021761","https://openalex.org/W2946328221","https://openalex.org/W2953356739","https://openalex.org/W2962739339","https://openalex.org/W2962756421","https://openalex.org/W2981852735","https://openalex.org/W2998702515","https://openalex.org/W3002293096","https://openalex.org/W3035390927","https://openalex.org/W3100848837","https://openalex.org/W3104097132","https://openalex.org/W3104186312","https://openalex.org/W3104249938","https://openalex.org/W3104987177","https://openalex.org/W3115081393","https://openalex.org/W3169483174","https://openalex.org/W3173954987","https://openalex.org/W4221153690","https://openalex.org/W4246649926","https://openalex.org/W4285190530","https://openalex.org/W4288089799","https://openalex.org/W4290875442","https://openalex.org/W4290927951","https://openalex.org/W4290943549"],"related_works":["https://openalex.org/W2378211422","https://openalex.org/W4321353415","https://openalex.org/W2745001401","https://openalex.org/W2130974462","https://openalex.org/W2028665553","https://openalex.org/W2086519370","https://openalex.org/W4246352526","https://openalex.org/W1482441085","https://openalex.org/W2966858528","https://openalex.org/W2151687600"],"abstract_inverted_index":{"Pre-trained":[0],"language":[1,8,47,66,140],"models":[2,67],"(PLMs)":[3],"are":[4,14],"fundamental":[5],"for":[6],"natural":[7],"processing":[9],"applications.":[10],"Most":[11],"existing":[12],"PLMs":[13],"not":[15,29,73],"tailored":[16],"to":[17,113,155],"the":[18,26,32,57,85,156],"noisy":[19],"user-generated":[20,117],"text":[21],"on":[22,53,84,100,123],"social":[23,34,40,59,81,87,126,151],"media,":[24],"and":[25,128,132,145,150],"pre-training":[27],"does":[28],"factor":[30],"in":[31,38],"valuable":[33,111],"engagement":[35,152],"logs":[36],"available":[37],"a":[39,45,80,90,110],"network.":[41,60],"We":[42,119,142],"present":[43],"TwHIN-BERT,":[44],"multilingual":[46,125],"model":[48,97,114,122],"productionized":[49],"at":[50],"Twitter,":[51],"trained":[52,71,99],"in-domain":[54],"data":[55],"from":[56,63],"popular":[58],"TwHIN-BERT":[61,144],"differs":[62],"prior":[64],"pre-trained":[65,139],"as":[68],"it":[69],"is":[70,98],"with":[72,79],"only":[74],"text-based":[75],"self-supervision":[76],"but":[77],"also":[78],"objective":[82],"based":[83],"rich":[86],"engagements":[88],"within":[89],"Twitter":[91],"heterogeneous":[92],"information":[93],"network":[94],"(TwHIN).":[95],"Our":[96],"7":[101],"billion":[102],"tweets":[103],"covering":[104],"over":[105,137],"100":[106],"distinct":[107],"languages,":[108],"providing":[109],"representation":[112],"short,":[115],"noisy,":[116],"text.":[118],"evaluate":[120],"our":[121,146],"various":[124],"recommendation":[127],"semantic":[129],"understanding":[130],"tasks":[131],"demonstrate":[133],"significant":[134],"metric":[135],"improvement":[136],"established":[138],"models.":[141],"open-source":[143],"curated":[147],"hashtag":[148],"prediction":[149],"benchmark":[153],"datasets":[154],"research":[157],"community.":[158]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":19},{"year":2024,"cited_by_count":23},{"year":2023,"cited_by_count":5}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
