{"id":"https://openalex.org/W2553684291","doi":"https://doi.org/10.1093/llc/fqw049","title":"A digital corpus resource of authentic anonymized French text messages: 88milSMS\u2014What about transcoding and linguistic annotation?","display_name":"A digital corpus resource of authentic anonymized French text messages: 88milSMS\u2014What about transcoding and linguistic annotation?","publication_year":2016,"publication_date":"2016-11-17","ids":{"openalex":"https://openalex.org/W2553684291","doi":"https://doi.org/10.1093/llc/fqw049","mag":"2553684291"},"language":"en","primary_location":{"id":"doi:10.1093/llc/fqw049","is_oa":true,"landing_page_url":"https://doi.org/10.1093/llc/fqw049","pdf_url":"https://academic.oup.com/dsh/article-pdf/32/suppl_1/i92/17751451/fqw049.pdf","source":{"id":"https://openalex.org/S2734814886","display_name":"Digital Scholarship in the Humanities","issn_l":"2055-7671","issn":["2055-7671","2055-768X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310311648","host_organization_name":"Oxford University Press","host_organization_lineage":["https://openalex.org/P4310311648","https://openalex.org/P4310311647"],"host_organization_lineage_names":["Oxford University Press","University of Oxford"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Digital Scholarship in the Humanities","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://academic.oup.com/dsh/article-pdf/32/suppl_1/i92/17751451/fqw049.pdf","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5078207340","display_name":"Rachel Panckhurst","orcid":"https://orcid.org/0000-0003-1209-5804"},"institutions":[{"id":"https://openalex.org/I4210095130","display_name":"Universit\u00e9 Paul-Val\u00e9ry Montpellier","ror":null,"country_code":"FR","type":null,"lineage":["https://openalex.org/I4210095130"]},{"id":"https://openalex.org/I924572780","display_name":"Praxis","ror":"https://ror.org/00b8kjk28","country_code":"EE","type":"nonprofit","lineage":["https://openalex.org/I924572780"]}],"countries":["EE","FR"],"is_corresponding":true,"raw_author_name":"Rachel Panckhurst","raw_affiliation_strings":["Praxiling, Universit\u00e9 Paul-Val\u00e9ry Montpellier 3, France","Praxiling"],"affiliations":[{"raw_affiliation_string":"Praxiling, Universit\u00e9 Paul-Val\u00e9ry Montpellier 3, France","institution_ids":["https://openalex.org/I4210095130"]},{"raw_affiliation_string":"Praxiling","institution_ids":["https://openalex.org/I924572780"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5078207340"],"corresponding_institution_ids":["https://openalex.org/I4210095130","https://openalex.org/I924572780"],"apc_list":{"value":2522,"currency":"GBP","value_usd":3093},"apc_paid":{"value":2522,"currency":"GBP","value_usd":3093},"fwci":0.0,"has_fulltext":true,"cited_by_count":39,"citation_normalized_percentile":{"value":0.08054775,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"fqw049","last_page":"fqw049"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12380","display_name":"Authorship Attribution and Profiling","score":0.9958000183105469,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9835000038146973,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/transcoding","display_name":"Transcoding","score":0.8816613554954529},{"id":"https://openalex.org/keywords/annotation","display_name":"Annotation","score":0.828522801399231},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.6157663464546204},{"id":"https://openalex.org/keywords/resource","display_name":"Resource (disambiguation)","score":0.5853106379508972},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5061468482017517},{"id":"https://openalex.org/keywords/corpus-linguistics","display_name":"Corpus linguistics","score":0.41180509328842163},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4068194031715393},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.33794698119163513},{"id":"https://openalex.org/keywords/philosophy","display_name":"Philosophy","score":0.09605392813682556}],"concepts":[{"id":"https://openalex.org/C134535813","wikidata":"https://www.wikidata.org/wiki/Q1888734","display_name":"Transcoding","level":2,"score":0.8816613554954529},{"id":"https://openalex.org/C2776321320","wikidata":"https://www.wikidata.org/wiki/Q857525","display_name":"Annotation","level":2,"score":0.828522801399231},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.6157663464546204},{"id":"https://openalex.org/C206345919","wikidata":"https://www.wikidata.org/wiki/Q20380951","display_name":"Resource (disambiguation)","level":2,"score":0.5853106379508972},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5061468482017517},{"id":"https://openalex.org/C532629269","wikidata":"https://www.wikidata.org/wiki/Q865083","display_name":"Corpus linguistics","level":2,"score":0.41180509328842163},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4068194031715393},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.33794698119163513},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.09605392813682556},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1093/llc/fqw049","is_oa":true,"landing_page_url":"https://doi.org/10.1093/llc/fqw049","pdf_url":"https://academic.oup.com/dsh/article-pdf/32/suppl_1/i92/17751451/fqw049.pdf","source":{"id":"https://openalex.org/S2734814886","display_name":"Digital Scholarship in the Humanities","issn_l":"2055-7671","issn":["2055-7671","2055-768X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310311648","host_organization_name":"Oxford University Press","host_organization_lineage":["https://openalex.org/P4310311648","https://openalex.org/P4310311647"],"host_organization_lineage_names":["Oxford University Press","University of Oxford"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Digital Scholarship in the Humanities","raw_type":"journal-article"},{"id":"pmh:oai:HAL:hal-01485567v1","is_oa":false,"landing_page_url":"https://hal.science/hal-01485567","pdf_url":null,"source":{"id":"https://openalex.org/S4306402512","display_name":"HAL (Le Centre pour la Communication Scientifique Directe)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1294671590","host_organization_name":"Centre National de la Recherche Scientifique","host_organization_lineage":["https://openalex.org/I1294671590"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"https://academic.oup.com/dsh","raw_type":"Journal articles"}],"best_oa_location":{"id":"doi:10.1093/llc/fqw049","is_oa":true,"landing_page_url":"https://doi.org/10.1093/llc/fqw049","pdf_url":"https://academic.oup.com/dsh/article-pdf/32/suppl_1/i92/17751451/fqw049.pdf","source":{"id":"https://openalex.org/S2734814886","display_name":"Digital Scholarship in the Humanities","issn_l":"2055-7671","issn":["2055-7671","2055-768X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310311648","host_organization_name":"Oxford University Press","host_organization_lineage":["https://openalex.org/P4310311648","https://openalex.org/P4310311647"],"host_organization_lineage_names":["Oxford University Press","University of Oxford"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Digital Scholarship in the Humanities","raw_type":"journal-article"},"sustainable_development_goals":[{"score":0.5600000023841858,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320322892","display_name":"Centre National de la Recherche Scientifique","ror":"https://ror.org/02feahw73"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2553684291.pdf","grobid_xml":"https://content.openalex.org/works/W2553684291.grobid-xml"},"referenced_works_count":30,"referenced_works":["https://openalex.org/W87518688","https://openalex.org/W95844184","https://openalex.org/W156229664","https://openalex.org/W291995643","https://openalex.org/W332537236","https://openalex.org/W594320116","https://openalex.org/W1552062513","https://openalex.org/W1682245969","https://openalex.org/W1695714277","https://openalex.org/W1836961878","https://openalex.org/W2078343981","https://openalex.org/W2127011888","https://openalex.org/W2160637503","https://openalex.org/W2163942301","https://openalex.org/W2245562056","https://openalex.org/W2250491265","https://openalex.org/W2293937123","https://openalex.org/W2485371128","https://openalex.org/W2542760601","https://openalex.org/W2584031441","https://openalex.org/W2595632333","https://openalex.org/W2625465398","https://openalex.org/W2894747428","https://openalex.org/W2896851686","https://openalex.org/W3135790904","https://openalex.org/W4238989478","https://openalex.org/W4251738133","https://openalex.org/W4290647079","https://openalex.org/W6678611536","https://openalex.org/W6690317807"],"related_works":["https://openalex.org/W2359384769","https://openalex.org/W2136286070","https://openalex.org/W4236637176","https://openalex.org/W1969215655","https://openalex.org/W2101538619","https://openalex.org/W1521096565","https://openalex.org/W2156828418","https://openalex.org/W1583359735","https://openalex.org/W901044134","https://openalex.org/W1990772768"],"abstract_inverted_index":{"In":[0,195],"2011,":[1],"six":[2],"academics":[3],"gathered":[4],"over":[5],"90,000":[6],"authentic":[7,79],"text":[8,80,138],"messages":[9,81],"(SMS)":[10],"in":[11,17,82,191,224],"French":[12,20,123,137],"from":[13,157],"the":[14,86,140,143,146,158,184,199,203,207,225,229],"general":[15],"public,":[16],"compliance":[18],"with":[19],"law":[21],"(http://sud4science.org,Panckhurst":[22],"et":[23,61,110,114,164,189],"al.,":[24,62,111,115,165,190],"2013).":[25,116],"The":[26,47,130,167],"SMS":[27,88],"\u2018donors\u2019":[28],"were":[29,107,119],"also":[30,171],"invited":[31],"to":[32,72,155,217],"fill":[33],"out":[34],"a":[35,53,74,91,102,176],"sociolinguistic":[36,147],"questionnaire":[37,148],"(see":[38],"Figure":[39],"A1,":[40],"Mo\u00efse,":[41,45],"2013,":[42],"Panckhurst":[43,163,188],"and":[44,65,77,96,101,126,145,205,221],"2014).":[46,166],"\u2018sud4science\u2019":[48,87],"project":[49,204],"is":[50],"part":[51],"of":[52,94,134,228],"vast":[54],"international":[55],"initiative,":[56],"entitled":[57],"\u2018sms4science\u2019":[58],"(http://www.sms4science.org/,":[59],"Fairon":[60],"2006,":[63],"Cougnon":[64],"Fairon,":[66],"2014,":[67,112],"Cougnon,":[68],"2015),":[69],"which":[70],"aims":[71],"build":[73],"worldwide":[75],"database":[76],"analyse":[78],"different":[83],"languages.":[84],"After":[85],"data":[89,149],"collection,":[90],"pre-processing":[92],"phase":[93,106],"checking":[95],"eliminating":[97],"any":[98],"spurious":[99],"information":[100],"three-step":[103],"semi-automatic":[104],"anonymization":[105,208],"conducted":[108],"(Accorsi":[109],"Patel":[113],"Two":[117],"extracts":[118,144],"transcoded":[120],"into":[121],"standardized":[122],"(1,000":[124],"SMS)":[125],"annotated":[127],"(100":[128],"SMS).":[129],"finalized":[131],"digital":[132],"resource":[133],"88,000":[135],"anonymized":[136],"messages,":[139],"\u201888milSMS\u2019":[141],"corpus,":[142],"are":[150],"currently":[151],"available":[152,174],"for":[153],"all":[154],"download,":[156],"Huma-Num":[159],"web":[160],"service":[161],"(http://88milsms.huma-num.fr,":[162],"88milSMS":[168],"corpus":[169],"has":[170],"recently":[172],"become":[173],"via":[175],"Creative":[177],"Commons":[178],"Attribution":[179],"4.0":[180],"International":[181],"licence":[182],"on":[183,213],"\u2018Ortolang\u2019":[185],"platform":[186],"(https://hdl.handle.net/11403/comere/cmr-88milsms/cmr-88milsms-tei-v1,":[187],"Chanier":[192],"(ed),":[193],"2016).":[194],"this":[196],"paper,":[197],"first":[198,226],"authors":[200],"briefly":[201],"situate":[202],"describe":[206],"process.":[209],"Then,":[210],"they":[211,215],"focus":[212],"why":[214],"decided":[216],"exclude":[218],"full":[219],"\u2018transcoding\u2019":[220],"linguistic":[222],"annotation":[223],"version":[227],"final":[230],"corpus.":[231]},"counts_by_year":[{"year":2024,"cited_by_count":38},{"year":2020,"cited_by_count":1}],"updated_date":"2026-04-21T08:09:41.155169","created_date":"2025-10-10T00:00:00"}
