{"id":"https://openalex.org/W2889055913","doi":"https://doi.org/10.18653/v1/d18-1148","title":"The Remarkable Benefit of User-Level Aggregation for Lexical-based Population-Level Predictions","display_name":"The Remarkable Benefit of User-Level Aggregation for Lexical-based Population-Level Predictions","publication_year":2018,"publication_date":"2018-01-01","ids":{"openalex":"https://openalex.org/W2889055913","doi":"https://doi.org/10.18653/v1/d18-1148","mag":"2889055913"},"language":"en","primary_location":{"id":"doi:10.18653/v1/d18-1148","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/d18-1148","pdf_url":"https://www.aclweb.org/anthology/D18-1148.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.aclweb.org/anthology/D18-1148.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5029821479","display_name":"Salvatore Giorgi","orcid":"https://orcid.org/0000-0001-7381-6295"},"institutions":[{"id":"https://openalex.org/I79576946","display_name":"University of Pennsylvania","ror":"https://ror.org/00b30xv10","country_code":"US","type":"education","lineage":["https://openalex.org/I79576946"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Salvatore Giorgi","raw_affiliation_strings":["Department of Psychology, University of Pennsylvania"],"affiliations":[{"raw_affiliation_string":"Department of Psychology, University of Pennsylvania","institution_ids":["https://openalex.org/I79576946"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5086173474","display_name":"Daniel Preo\u021biuc-Pietro","orcid":"https://orcid.org/0000-0002-4504-0212"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Daniel Preo\u0163iuc-Pietro","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5012554861","display_name":"Anneke Buffone","orcid":null},"institutions":[{"id":"https://openalex.org/I79576946","display_name":"University of Pennsylvania","ror":"https://ror.org/00b30xv10","country_code":"US","type":"education","lineage":["https://openalex.org/I79576946"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Anneke Buffone","raw_affiliation_strings":["Department of Psychology, University of Pennsylvania"],"affiliations":[{"raw_affiliation_string":"Department of Psychology, University of Pennsylvania","institution_ids":["https://openalex.org/I79576946"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5086739784","display_name":"Daniel Rieman","orcid":null},"institutions":[{"id":"https://openalex.org/I79576946","display_name":"University of Pennsylvania","ror":"https://ror.org/00b30xv10","country_code":"US","type":"education","lineage":["https://openalex.org/I79576946"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Daniel Rieman","raw_affiliation_strings":["Department of Psychology, University of Pennsylvania"],"affiliations":[{"raw_affiliation_string":"Department of Psychology, University of Pennsylvania","institution_ids":["https://openalex.org/I79576946"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5044944954","display_name":"Lyle Ungar","orcid":"https://orcid.org/0000-0003-2047-1443"},"institutions":[{"id":"https://openalex.org/I79576946","display_name":"University of Pennsylvania","ror":"https://ror.org/00b30xv10","country_code":"US","type":"education","lineage":["https://openalex.org/I79576946"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Lyle Ungar","raw_affiliation_strings":["Computer and Information Science, University of Pennsylvania"],"affiliations":[{"raw_affiliation_string":"Computer and Information Science, University of Pennsylvania","institution_ids":["https://openalex.org/I79576946"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5046253607","display_name":"H. Andrew Schwartz","orcid":"https://orcid.org/0000-0002-6383-3339"},"institutions":[{"id":"https://openalex.org/I59553526","display_name":"Stony Brook University","ror":"https://ror.org/05qghxh33","country_code":"US","type":"education","lineage":["https://openalex.org/I59553526"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"H. Andrew Schwartz","raw_affiliation_strings":["Computer Science, Stony Brook University"],"affiliations":[{"raw_affiliation_string":"Computer Science, Stony Brook University","institution_ids":["https://openalex.org/I59553526"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5029821479"],"corresponding_institution_ids":["https://openalex.org/I79576946"],"apc_list":null,"apc_paid":null,"fwci":4.7662,"has_fulltext":true,"cited_by_count":11,"citation_normalized_percentile":{"value":0.95013732,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"1167","last_page":"1172"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11147","display_name":"Misinformation and Its Impacts","score":0.9156000018119812,"subfield":{"id":"https://openalex.org/subfields/3312","display_name":"Sociology and Political Science"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11147","display_name":"Misinformation and Its Impacts","score":0.9156000018119812,"subfield":{"id":"https://openalex.org/subfields/3312","display_name":"Sociology and Political Science"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12488","display_name":"Mental Health via Writing","score":0.9057000279426575,"subfield":{"id":"https://openalex.org/subfields/3207","display_name":"Social Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10737","display_name":"Health Literacy and Information Accessibility","score":0.9035000205039978,"subfield":{"id":"https://openalex.org/subfields/3600","display_name":"General Health Professions"},"field":{"id":"https://openalex.org/fields/36","display_name":"Health Professions"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/nowcasting","display_name":"Nowcasting","score":0.865270733833313},{"id":"https://openalex.org/keywords/social-media","display_name":"Social media","score":0.664785623550415},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5686134696006775},{"id":"https://openalex.org/keywords/population","display_name":"Population","score":0.43098652362823486},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.3642948865890503},{"id":"https://openalex.org/keywords/econometrics","display_name":"Econometrics","score":0.3465147316455841},{"id":"https://openalex.org/keywords/geography","display_name":"Geography","score":0.32154136896133423},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.2297217845916748},{"id":"https://openalex.org/keywords/sociology","display_name":"Sociology","score":0.19059231877326965},{"id":"https://openalex.org/keywords/demography","display_name":"Demography","score":0.15913838148117065},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.1250375211238861}],"concepts":[{"id":"https://openalex.org/C2781013037","wikidata":"https://www.wikidata.org/wiki/Q1433331","display_name":"Nowcasting","level":2,"score":0.865270733833313},{"id":"https://openalex.org/C518677369","wikidata":"https://www.wikidata.org/wiki/Q202833","display_name":"Social media","level":2,"score":0.664785623550415},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5686134696006775},{"id":"https://openalex.org/C2908647359","wikidata":"https://www.wikidata.org/wiki/Q2625603","display_name":"Population","level":2,"score":0.43098652362823486},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.3642948865890503},{"id":"https://openalex.org/C149782125","wikidata":"https://www.wikidata.org/wiki/Q160039","display_name":"Econometrics","level":1,"score":0.3465147316455841},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.32154136896133423},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.2297217845916748},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.19059231877326965},{"id":"https://openalex.org/C149923435","wikidata":"https://www.wikidata.org/wiki/Q37732","display_name":"Demography","level":1,"score":0.15913838148117065},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.1250375211238861},{"id":"https://openalex.org/C153294291","wikidata":"https://www.wikidata.org/wiki/Q25261","display_name":"Meteorology","level":1,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.18653/v1/d18-1148","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/d18-1148","pdf_url":"https://www.aclweb.org/anthology/D18-1148.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:1808.09600","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1808.09600","pdf_url":"https://arxiv.org/pdf/1808.09600","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"mag:2889055913","is_oa":true,"landing_page_url":"https://arxiv.org/pdf/1808.09600v1","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"doi:10.48550/arxiv.1808.09600","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.1808.09600","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.18653/v1/d18-1148","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/d18-1148","pdf_url":"https://www.aclweb.org/anthology/D18-1148.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing","raw_type":"proceedings-article"},"sustainable_development_goals":[{"display_name":"No poverty","id":"https://metadata.un.org/sdg/1","score":0.6299999952316284}],"awards":[{"id":"https://openalex.org/G1119886921","display_name":null,"funder_award_id":"TRT0048","funder_id":"https://openalex.org/F4320327997","funder_display_name":"Templeton Religion Trust"},{"id":"https://openalex.org/G3658061967","display_name":null,"funder_award_id":"#TRT0048","funder_id":"https://openalex.org/F4320327997","funder_display_name":"Templeton Religion Trust"}],"funders":[{"id":"https://openalex.org/F4320327997","display_name":"Templeton Religion Trust","ror":"https://ror.org/02q53mk25"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2889055913.pdf","grobid_xml":"https://content.openalex.org/works/W2889055913.grobid-xml"},"referenced_works_count":1,"referenced_works":["https://openalex.org/W2169200297"],"related_works":["https://openalex.org/W2771849569","https://openalex.org/W2765445004","https://openalex.org/W2608747192","https://openalex.org/W2251094674","https://openalex.org/W2584161741","https://openalex.org/W2625408790","https://openalex.org/W3104127453","https://openalex.org/W2250747954","https://openalex.org/W2739746146","https://openalex.org/W2884787651","https://openalex.org/W2606332827","https://openalex.org/W2950157361","https://openalex.org/W2963296228","https://openalex.org/W3173459173","https://openalex.org/W2285361963","https://openalex.org/W41368942","https://openalex.org/W2772726170","https://openalex.org/W2433115358","https://openalex.org/W2603990908","https://openalex.org/W3110058997"],"abstract_inverted_index":{"Nowcasting":[0],"based":[1],"on":[2,59],"social":[3],"media":[4],"text":[5],"promises":[6],"to":[7,31,85,95,129],"provide":[8],"unobtrusive":[9],"and":[10,68,73,113],"near":[11],"real-time":[12],"predictions":[13],"of":[14,37,105,125],"community-level":[15,50,115],"outcomes.":[16],"These":[17],"outcomes":[18,70],"are":[19],"typically":[20],"regarding":[21],"people,":[22],"but":[23],"the":[24,34,102],"data":[25],"is":[26],"often":[27],"aggregated":[28,55,112],"without":[29],"regard":[30],"users":[32],"in":[33,76],"Twitter":[35,53],"populations":[36],"each":[38],"community.":[39],"This":[40],"paper":[41],"describes":[42],"a":[43],"simple":[44],"yet":[45],"effective":[46],"method":[47],"for":[48,87,97,132],"building":[49],"models":[51],"using":[52],"language":[54],"by":[56],"user.":[57],"Results":[58],"four":[60],"different":[61],"U.S.":[62],"county-level":[63],"tasks,":[64],"spanning":[65],"demographic,":[66],"health,":[67],"psychological":[69],"show":[71],"large":[72],"consistent":[74],"improvements":[75],"prediction":[77,90],"accuracies":[78],"(e.g.":[79],"from":[80,118],"Pearson":[81],"r":[82,92],"=":[83,93],".73":[84],".82":[86],"median":[88],"income":[89],"or":[91],".37":[94],".47":[96],"life":[98],"satisfaction":[99],"prediction)":[100],"over":[101],"standard":[103],"approach":[104],"aggregating":[106],"all":[107],"tweets.":[108],"We":[109],"make":[110],"our":[111],"anonymized":[114],"data,":[116],"derived":[117],"37":[119],"billion":[120,124],"tweets":[121],"-over":[122],"1":[123],"which":[126],"were":[127],"mapped":[128],"counties,":[130],"available":[131],"research.":[133]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":2},{"year":2020,"cited_by_count":5},{"year":2019,"cited_by_count":1}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
