{"id":"https://openalex.org/W7129398327","doi":"https://doi.org/10.48550/arxiv.2602.14819","title":"Testimole-Conversational: A 30-Billion-Word Italian Discussion Board Corpus (1996-2024) for Language Modeling and Sociolinguistic Research","display_name":"Testimole-Conversational: A 30-Billion-Word Italian Discussion Board Corpus (1996-2024) for Language Modeling and Sociolinguistic Research","publication_year":2026,"publication_date":"2026-02-16","ids":{"openalex":"https://openalex.org/W7129398327","doi":"https://doi.org/10.48550/arxiv.2602.14819"},"language":null,"primary_location":{"id":"pmh:doi:10.48550/arxiv.2602.14819","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5126241894","display_name":"Matteo Rinaldi","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Rinaldi, Matteo","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5035475244","display_name":"Rossella Varvara","orcid":"https://orcid.org/0000-0001-9957-2807"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Varvara, Rossella","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5126197467","display_name":"Viviana Patti","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Patti, Viviana","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5126241894"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12380","display_name":"Authorship Attribution and Profiling","score":0.26109999418258667,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12380","display_name":"Authorship Attribution and Profiling","score":0.26109999418258667,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12090","display_name":"Language and cultural evolution","score":0.17319999635219574,"subfield":{"id":"https://openalex.org/subfields/3316","display_name":"Cultural Studies"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T13910","display_name":"Computational and Text Analysis Methods","score":0.10029999911785126,"subfield":{"id":"https://openalex.org/subfields/3300","display_name":"General Social Sciences"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/variety","display_name":"Variety (cybernetics)","score":0.7304999828338623},{"id":"https://openalex.org/keywords/relevance","display_name":"Relevance (law)","score":0.6401000022888184},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.47119998931884766},{"id":"https://openalex.org/keywords/resource","display_name":"Resource (disambiguation)","score":0.4690000116825104},{"id":"https://openalex.org/keywords/variation","display_name":"Variation (astronomy)","score":0.4471000134944916},{"id":"https://openalex.org/keywords/corpus-linguistics","display_name":"Corpus linguistics","score":0.396699994802475},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.365200012922287},{"id":"https://openalex.org/keywords/ideal","display_name":"Ideal (ethics)","score":0.36059999465942383}],"concepts":[{"id":"https://openalex.org/C136197465","wikidata":"https://www.wikidata.org/wiki/Q1729295","display_name":"Variety (cybernetics)","level":2,"score":0.7304999828338623},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.6704999804496765},{"id":"https://openalex.org/C158154518","wikidata":"https://www.wikidata.org/wiki/Q7310970","display_name":"Relevance (law)","level":2,"score":0.6401000022888184},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6014000177383423},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.47119998931884766},{"id":"https://openalex.org/C206345919","wikidata":"https://www.wikidata.org/wiki/Q20380951","display_name":"Resource (disambiguation)","level":2,"score":0.4690000116825104},{"id":"https://openalex.org/C2778334786","wikidata":"https://www.wikidata.org/wiki/Q1586270","display_name":"Variation (astronomy)","level":2,"score":0.4471000134944916},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.4413999915122986},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4106999933719635},{"id":"https://openalex.org/C532629269","wikidata":"https://www.wikidata.org/wiki/Q865083","display_name":"Corpus linguistics","level":2,"score":0.396699994802475},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3849000036716461},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.365200012922287},{"id":"https://openalex.org/C2776639384","wikidata":"https://www.wikidata.org/wiki/Q840396","display_name":"Ideal (ethics)","level":2,"score":0.36059999465942383},{"id":"https://openalex.org/C518677369","wikidata":"https://www.wikidata.org/wiki/Q202833","display_name":"Social media","level":2,"score":0.3465999960899353},{"id":"https://openalex.org/C2779313563","wikidata":"https://www.wikidata.org/wiki/Q17072565","display_name":"On Language","level":2,"score":0.3366999924182892},{"id":"https://openalex.org/C84389358","wikidata":"https://www.wikidata.org/wiki/Q1129466","display_name":"Discourse analysis","level":2,"score":0.32350000739097595},{"id":"https://openalex.org/C28519872","wikidata":"https://www.wikidata.org/wiki/Q160845","display_name":"Sociolinguistics","level":2,"score":0.3009999990463257},{"id":"https://openalex.org/C2987219923","wikidata":"https://www.wikidata.org/wiki/Q777864","display_name":"Linguistic analysis","level":2,"score":0.29789999127388},{"id":"https://openalex.org/C14919245","wikidata":"https://www.wikidata.org/wiki/Q1976109","display_name":"Language technology","level":4,"score":0.2955000102519989},{"id":"https://openalex.org/C155092808","wikidata":"https://www.wikidata.org/wiki/Q182557","display_name":"Computational linguistics","level":2,"score":0.2711000144481659},{"id":"https://openalex.org/C83479923","wikidata":"https://www.wikidata.org/wiki/Q2063748","display_name":"Universal Networking Language","level":4,"score":0.26989999413490295},{"id":"https://openalex.org/C59656382","wikidata":"https://www.wikidata.org/wiki/Q191536","display_name":"Conjunction (astronomy)","level":2,"score":0.2655999958515167}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:doi:10.48550/arxiv.2602.14819","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2602.14819","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.14819","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2602.14819","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[{"display_name":"Quality Education","score":0.810352623462677,"id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"We":[0],"present":[1],"\"Testimole-conversational\"":[2],"a":[3,41,54],"massive":[4],"collection":[5],"of":[6,17,57,95],"discussion":[7,37],"boards":[8],"messages":[9,39],"in":[10,72,101],"the":[11,18,112],"Italian":[12,32],"language.":[13],"The":[14,51,104],"large":[15],"size":[16],"corpus,":[19],"more":[20],"than":[21],"30B":[22],"word-tokens":[23],"(1996-2024),":[24],"renders":[25],"it":[26,91],"an":[27],"ideal":[28],"dataset":[29],"for":[30,44,79],"native":[31],"Large":[33],"Language":[34],"Models'pre-training.":[35],"Furthermore,":[36],"boards'":[38],"are":[40],"relevant":[42],"resource":[43,105],"linguistic":[45],"as":[46,48,83],"well":[47],"sociological":[49],"analysis.":[50],"corpus":[52],"captures":[53],"rich":[55],"variety":[56],"computer-mediated":[58],"communication,":[59],"offering":[60],"insights":[61],"into":[62],"informal":[63],"written":[64],"Italian,":[65],"discourse":[66],"dynamics,":[67],"and":[68,88,98],"online":[69],"social":[70,99],"interaction":[71],"wide":[73],"time":[74],"span.":[75],"Beyond":[76],"its":[77],"relevance":[78],"NLP":[80],"applications":[81],"such":[82],"language":[84,96],"modelling,":[85],"domain":[86],"adaptation,":[87],"conversational":[89],"analysis,":[90],"also":[92],"support":[93],"investigations":[94],"variation":[97],"phenomena":[100],"digital":[102],"communication.":[103],"will":[106],"be":[107],"made":[108],"freely":[109],"available":[110],"to":[111],"research":[113],"community.":[114]},"counts_by_year":[],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2026-02-18T00:00:00"}
