{"id":"https://openalex.org/W3010832370","doi":"https://doi.org/10.1017/s1351324920000108","title":"Fine-grained analysis of language varieties and demographics","display_name":"Fine-grained analysis of language varieties and demographics","publication_year":2020,"publication_date":"2020-03-10","ids":{"openalex":"https://openalex.org/W3010832370","doi":"https://doi.org/10.1017/s1351324920000108","mag":"3010832370"},"language":"en","primary_location":{"id":"doi:10.1017/s1351324920000108","is_oa":true,"landing_page_url":"https://doi.org/10.1017/s1351324920000108","pdf_url":"https://www.cambridge.org/core/services/aop-cambridge-core/content/view/3BABF08F02CAA8AB6BED831F7DCA3561/S1351324920000108a.pdf/div-class-title-fine-grained-analysis-of-language-varieties-and-demographics-div.pdf","source":{"id":"https://openalex.org/S18088403","display_name":"Natural Language Engineering","issn_l":"1351-3249","issn":["1351-3249","1469-8110"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310311721","host_organization_name":"Cambridge University Press","host_organization_lineage":["https://openalex.org/P4310311721","https://openalex.org/P4310311702"],"host_organization_lineage_names":["Cambridge University Press","University of Cambridge"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Natural Language Engineering","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"bronze","oa_url":"https://www.cambridge.org/core/services/aop-cambridge-core/content/view/3BABF08F02CAA8AB6BED831F7DCA3561/S1351324920000108a.pdf/div-class-title-fine-grained-analysis-of-language-varieties-and-demographics-div.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5035051966","display_name":"Francisco Rangel","orcid":"https://orcid.org/0000-0002-6583-3682"},"institutions":[{"id":"https://openalex.org/I60053951","display_name":"Universitat Polit\u00e8cnica de Val\u00e8ncia","ror":"https://ror.org/01460j859","country_code":"ES","type":"education","lineage":["https://openalex.org/I60053951"]}],"countries":["ES"],"is_corresponding":true,"raw_author_name":"Francisco Rangel","raw_affiliation_strings":["Pattern Recognition and Human Language Technologies, Universitat Polit\u00e8cnica de Val\u00e8ncia, Spain"],"affiliations":[{"raw_affiliation_string":"Pattern Recognition and Human Language Technologies, Universitat Polit\u00e8cnica de Val\u00e8ncia, Spain","institution_ids":["https://openalex.org/I60053951"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5053947754","display_name":"Paolo Rosso","orcid":"https://orcid.org/0000-0002-8922-1242"},"institutions":[{"id":"https://openalex.org/I60053951","display_name":"Universitat Polit\u00e8cnica de Val\u00e8ncia","ror":"https://ror.org/01460j859","country_code":"ES","type":"education","lineage":["https://openalex.org/I60053951"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Paolo Rosso","raw_affiliation_strings":["Pattern Recognition and Human Language Technologies, Universitat Polit\u00e8cnica de Val\u00e8ncia, Spain"],"affiliations":[{"raw_affiliation_string":"Pattern Recognition and Human Language Technologies, Universitat Polit\u00e8cnica de Val\u00e8ncia, Spain","institution_ids":["https://openalex.org/I60053951"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047390057","display_name":"Wajdi Zaghouani","orcid":"https://orcid.org/0000-0003-1521-5568"},"institutions":[{"id":"https://openalex.org/I4210144839","display_name":"Hamad bin Khalifa University","ror":"https://ror.org/03eyq4y97","country_code":"QA","type":"education","lineage":["https://openalex.org/I4210144839"]}],"countries":["QA"],"is_corresponding":false,"raw_author_name":"Wajdi Zaghouani","raw_affiliation_strings":["College of Humanities and Social Sciences, Hamad Bin Khalifa University, Ar-Rayyan, Qatar"],"affiliations":[{"raw_affiliation_string":"College of Humanities and Social Sciences, Hamad Bin Khalifa University, Ar-Rayyan, Qatar","institution_ids":["https://openalex.org/I4210144839"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5029753508","display_name":"Anis Charfi","orcid":null},"institutions":[{"id":"https://openalex.org/I4210089979","display_name":"Carnegie Mellon University Qatar","ror":"https://ror.org/00az5dt38","country_code":"QA","type":"education","lineage":["https://openalex.org/I4210089979","https://openalex.org/I74973139"]}],"countries":["QA"],"is_corresponding":false,"raw_author_name":"Anis Charfi","raw_affiliation_strings":["Information Systems Program, Carnegie Mellon University in Qatar, Ar-Rayyan, Qatar"],"affiliations":[{"raw_affiliation_string":"Information Systems Program, Carnegie Mellon University in Qatar, Ar-Rayyan, Qatar","institution_ids":["https://openalex.org/I4210089979"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5035051966"],"corresponding_institution_ids":["https://openalex.org/I60053951"],"apc_list":null,"apc_paid":null,"fwci":1.9031,"has_fulltext":true,"cited_by_count":16,"citation_normalized_percentile":{"value":0.88673463,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":"26","issue":"6","first_page":"641","last_page":"661"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12380","display_name":"Authorship Attribution and Profiling","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12380","display_name":"Authorship Attribution and Profiling","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9972000122070312,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9923999905586243,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7960845232009888},{"id":"https://openalex.org/keywords/anonymity","display_name":"Anonymity","score":0.581909716129303},{"id":"https://openalex.org/keywords/demographics","display_name":"Demographics","score":0.5626348853111267},{"id":"https://openalex.org/keywords/identification","display_name":"Identification (biology)","score":0.506468653678894},{"id":"https://openalex.org/keywords/offensive","display_name":"Offensive","score":0.4585326910018921},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4241040349006653},{"id":"https://openalex.org/keywords/turkish","display_name":"Turkish","score":0.4235611855983734},{"id":"https://openalex.org/keywords/hatred","display_name":"Hatred","score":0.4182721972465515},{"id":"https://openalex.org/keywords/variety","display_name":"Variety (cybernetics)","score":0.4133455753326416},{"id":"https://openalex.org/keywords/arabic","display_name":"Arabic","score":0.4105044901371002},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.37240445613861084},{"id":"https://openalex.org/keywords/internet-privacy","display_name":"Internet privacy","score":0.3654027581214905},{"id":"https://openalex.org/keywords/computer-security","display_name":"Computer security","score":0.3644682765007019},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.15005654096603394},{"id":"https://openalex.org/keywords/political-science","display_name":"Political science","score":0.14955708384513855},{"id":"https://openalex.org/keywords/law","display_name":"Law","score":0.13732954859733582}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7960845232009888},{"id":"https://openalex.org/C178005623","wikidata":"https://www.wikidata.org/wiki/Q308859","display_name":"Anonymity","level":2,"score":0.581909716129303},{"id":"https://openalex.org/C2780084366","wikidata":"https://www.wikidata.org/wiki/Q37732","display_name":"Demographics","level":2,"score":0.5626348853111267},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.506468653678894},{"id":"https://openalex.org/C176856949","wikidata":"https://www.wikidata.org/wiki/Q2001676","display_name":"Offensive","level":2,"score":0.4585326910018921},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4241040349006653},{"id":"https://openalex.org/C2781121862","wikidata":"https://www.wikidata.org/wiki/Q256","display_name":"Turkish","level":2,"score":0.4235611855983734},{"id":"https://openalex.org/C2780617971","wikidata":"https://www.wikidata.org/wiki/Q160232","display_name":"Hatred","level":3,"score":0.4182721972465515},{"id":"https://openalex.org/C136197465","wikidata":"https://www.wikidata.org/wiki/Q1729295","display_name":"Variety (cybernetics)","level":2,"score":0.4133455753326416},{"id":"https://openalex.org/C96455323","wikidata":"https://www.wikidata.org/wiki/Q13955","display_name":"Arabic","level":2,"score":0.4105044901371002},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.37240445613861084},{"id":"https://openalex.org/C108827166","wikidata":"https://www.wikidata.org/wiki/Q175975","display_name":"Internet privacy","level":1,"score":0.3654027581214905},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.3644682765007019},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.15005654096603394},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.14955708384513855},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.13732954859733582},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C149923435","wikidata":"https://www.wikidata.org/wiki/Q37732","display_name":"Demography","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C59822182","wikidata":"https://www.wikidata.org/wiki/Q441","display_name":"Botany","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1017/s1351324920000108","is_oa":true,"landing_page_url":"https://doi.org/10.1017/s1351324920000108","pdf_url":"https://www.cambridge.org/core/services/aop-cambridge-core/content/view/3BABF08F02CAA8AB6BED831F7DCA3561/S1351324920000108a.pdf/div-class-title-fine-grained-analysis-of-language-varieties-and-demographics-div.pdf","source":{"id":"https://openalex.org/S18088403","display_name":"Natural Language Engineering","issn_l":"1351-3249","issn":["1351-3249","1469-8110"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310311721","host_organization_name":"Cambridge University Press","host_organization_lineage":["https://openalex.org/P4310311721","https://openalex.org/P4310311702"],"host_organization_lineage_names":["Cambridge University Press","University of Cambridge"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Natural Language Engineering","raw_type":"journal-article"},{"id":"pmh:oai:riunet.upv.es:10251/166834","is_oa":true,"landing_page_url":"http://hdl.handle.net/10251/166834","pdf_url":null,"source":{"id":"https://openalex.org/S4306401500","display_name":"RiuNet (Politechnical University of Valencia)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I60053951","host_organization_name":"Universitat Polit\u00e8cnica de Val\u00e8ncia","host_organization_lineage":["https://openalex.org/I60053951"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"info:eu-repo/semantics/article"}],"best_oa_location":{"id":"doi:10.1017/s1351324920000108","is_oa":true,"landing_page_url":"https://doi.org/10.1017/s1351324920000108","pdf_url":"https://www.cambridge.org/core/services/aop-cambridge-core/content/view/3BABF08F02CAA8AB6BED831F7DCA3561/S1351324920000108a.pdf/div-class-title-fine-grained-analysis-of-language-varieties-and-demographics-div.pdf","source":{"id":"https://openalex.org/S18088403","display_name":"Natural Language Engineering","issn_l":"1351-3249","issn":["1351-3249","1469-8110"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310311721","host_organization_name":"Cambridge University Press","host_organization_lineage":["https://openalex.org/P4310311721","https://openalex.org/P4310311702"],"host_organization_lineage_names":["Cambridge University Press","University of Cambridge"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Natural Language Engineering","raw_type":"journal-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/5","score":0.46000000834465027,"display_name":"Gender equality"},{"id":"https://metadata.un.org/sdg/16","score":0.4099999964237213,"display_name":"Peace, Justice and strong institutions"}],"awards":[{"id":"https://openalex.org/G4122008771","display_name":null,"funder_award_id":"9-175-1-033","funder_id":"https://openalex.org/F4320332753","funder_display_name":"Qatar National Research Fund"}],"funders":[{"id":"https://openalex.org/F4320309815","display_name":"Qatar Foundation","ror":"https://ror.org/01cawbq05"},{"id":"https://openalex.org/F4320321038","display_name":"Fonds National de la Recherche Luxembourg","ror":"https://ror.org/039z13y21"},{"id":"https://openalex.org/F4320332753","display_name":"Qatar National Research Fund","ror":"https://ror.org/01svaqq28"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3010832370.pdf","grobid_xml":"https://content.openalex.org/works/W3010832370.grobid-xml"},"referenced_works_count":42,"referenced_works":["https://openalex.org/W395611350","https://openalex.org/W832361413","https://openalex.org/W968157591","https://openalex.org/W1492056975","https://openalex.org/W1761765343","https://openalex.org/W1978394996","https://openalex.org/W1979504415","https://openalex.org/W1993274894","https://openalex.org/W2001461029","https://openalex.org/W2023513308","https://openalex.org/W2026663304","https://openalex.org/W2084413241","https://openalex.org/W2129594729","https://openalex.org/W2136050570","https://openalex.org/W2140432583","https://openalex.org/W2147272182","https://openalex.org/W2160802179","https://openalex.org/W2162019804","https://openalex.org/W2250174871","https://openalex.org/W2251433042","https://openalex.org/W2251986002","https://openalex.org/W2252025714","https://openalex.org/W2553512497","https://openalex.org/W2561747913","https://openalex.org/W2584841643","https://openalex.org/W2618204048","https://openalex.org/W2620806258","https://openalex.org/W2752530998","https://openalex.org/W2752878932","https://openalex.org/W2798160434","https://openalex.org/W2806092253","https://openalex.org/W2888364742","https://openalex.org/W2889057516","https://openalex.org/W2889194293","https://openalex.org/W2889395474","https://openalex.org/W2962834862","https://openalex.org/W4256087959","https://openalex.org/W6657048213","https://openalex.org/W6691208988","https://openalex.org/W6691363017","https://openalex.org/W6729896516","https://openalex.org/W6731542071"],"related_works":["https://openalex.org/W2291890613","https://openalex.org/W2796585648","https://openalex.org/W2095697673","https://openalex.org/W2978739876","https://openalex.org/W2267650038","https://openalex.org/W2338899392","https://openalex.org/W4313315573","https://openalex.org/W2236103065","https://openalex.org/W2113854174","https://openalex.org/W3116082493"],"abstract_inverted_index":{"Abstract":[0],"The":[1,18],"rise":[2],"of":[3,20,28,91,115,136,153,174,186,194,203,213,220],"social":[4],"media":[5],"empowers":[6],"people":[7],"to":[8,35,45,58,107,168],"interact":[9],"and":[10,25,65,75,181,198],"communicate":[11],"with":[12,118,158],"anyone":[13],"anywhere":[14],"in":[15,70,123],"the":[16,62,97,113,119,124,141,151,154,159,192,195,201,204,211,214,218],"world.":[17],"possibility":[19],"being":[21],"anonymous":[22,63],"avoids":[23],"censorship":[24],"enables":[26],"freedom":[27],"expression.":[29],"Nevertheless,":[30],"this":[31,66,83,116],"anonymity":[32],"might":[33],"lead":[34],"cybersecurity":[36],"issues,":[37],"such":[38,52,77],"as":[39,78,208,210],"opinion":[40],"spam,":[41],"sexual":[42],"harassment,":[43],"incitement":[44],"hatred":[46],"or":[47],"even":[48],"terrorism":[49],"propaganda.":[50],"In":[51,82],"cases,":[53],"there":[54],"is":[55],"a":[56,88,102,169],"need":[57],"know":[59],"more":[60,170],"about":[61],"users":[64],"could":[67],"be":[68],"useful":[69],"several":[71],"domains":[72],"beyond":[73],"security":[74],"forensics":[76],"marketing,":[79],"for":[80,140],"example.":[81],"paper,":[84],"we":[85,163],"focus":[86],"on":[87,200,217],"fine-grained":[89,171],"analysis":[90],"language":[92,155],"varieties":[93,176],"while":[94],"considering":[95],"also":[96,149,190],"authors\u2019":[98,160,196],"demographics.":[99],"We":[100,111,131,148,188],"present":[101],"Low-Dimensionality":[103],"Statistical":[104],"Embedding":[105],"method":[106,117,167],"represent":[108],"text":[109],"documents.":[110],"compared":[112],"performance":[114,219],"best":[120,142],"performing":[121,143],"teams":[122],"Author":[125],"Profiling":[126],"task":[127],"at":[128,145],"PAN":[129,146],"2017.":[130,147],"obtained":[132,182],"an":[133,183],"average":[134],"accuracy":[135,185],"92.08%":[137],"versus":[138],"91.84%":[139],"team":[144],"analyse":[150],"relationship":[152],"variety":[156],"identification":[157,202],"gender.":[161],"Furthermore,":[162],"applied":[164],"our":[165,221],"proposed":[166],"annotated":[172],"corpus":[173,215],"Arabic":[175,206],"covering":[177],"22":[178],"Arab":[179],"countries":[180],"overall":[184],"88.89%.":[187],"have":[189],"investigated":[191],"effect":[193,212],"age":[197],"gender":[199],"different":[205],"varieties,":[207],"well":[209],"size":[216],"method.":[222]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":5},{"year":2021,"cited_by_count":6},{"year":2020,"cited_by_count":1}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
