{"id":"https://openalex.org/W4307348164","doi":"https://doi.org/10.1007/s10579-022-09624-1","title":"Register identification from the unrestricted open Web using the Corpus of Online Registers of English","display_name":"Register identification from the unrestricted open Web using the Corpus of Online Registers of English","publication_year":2022,"publication_date":"2022-10-26","ids":{"openalex":"https://openalex.org/W4307348164","doi":"https://doi.org/10.1007/s10579-022-09624-1"},"language":"en","primary_location":{"id":"doi:10.1007/s10579-022-09624-1","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10579-022-09624-1","pdf_url":"https://link.springer.com/content/pdf/10.1007/s10579-022-09624-1.pdf","source":{"id":"https://openalex.org/S4306424877","display_name":"Language Resources and Evaluation","issn_l":"1574-020X","issn":["1574-020X","1574-0218"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Language Resources and Evaluation","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://link.springer.com/content/pdf/10.1007/s10579-022-09624-1.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5036607482","display_name":"Veronika Laippala","orcid":"https://orcid.org/0000-0002-7635-429X"},"institutions":[{"id":"https://openalex.org/I155660961","display_name":"University of Turku","ror":"https://ror.org/05vghhr25","country_code":"FI","type":"education","lineage":["https://openalex.org/I155660961"]}],"countries":["FI"],"is_corresponding":true,"raw_author_name":"Veronika Laippala","raw_affiliation_strings":["University of Turku, Turku, Finland"],"affiliations":[{"raw_affiliation_string":"University of Turku, Turku, Finland","institution_ids":["https://openalex.org/I155660961"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5051842492","display_name":"Samuel R\u00f6nnqvist","orcid":null},"institutions":[{"id":"https://openalex.org/I155660961","display_name":"University of Turku","ror":"https://ror.org/05vghhr25","country_code":"FI","type":"education","lineage":["https://openalex.org/I155660961"]}],"countries":["FI"],"is_corresponding":false,"raw_author_name":"Samuel R\u00f6nnqvist","raw_affiliation_strings":["University of Turku, Turku, Finland"],"affiliations":[{"raw_affiliation_string":"University of Turku, Turku, Finland","institution_ids":["https://openalex.org/I155660961"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5055050922","display_name":"Miika Oinonen","orcid":null},"institutions":[{"id":"https://openalex.org/I155660961","display_name":"University of Turku","ror":"https://ror.org/05vghhr25","country_code":"FI","type":"education","lineage":["https://openalex.org/I155660961"]}],"countries":["FI"],"is_corresponding":false,"raw_author_name":"Miika Oinonen","raw_affiliation_strings":["University of Turku, Turku, Finland"],"affiliations":[{"raw_affiliation_string":"University of Turku, Turku, Finland","institution_ids":["https://openalex.org/I155660961"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5077957546","display_name":"Aki-Juhani Kyr\u00f6l\u00e4inen","orcid":"https://orcid.org/0000-0001-8874-9005"},"institutions":[{"id":"https://openalex.org/I155660961","display_name":"University of Turku","ror":"https://ror.org/05vghhr25","country_code":"FI","type":"education","lineage":["https://openalex.org/I155660961"]}],"countries":["FI"],"is_corresponding":false,"raw_author_name":"Aki-Juhani Kyr\u00f6l\u00e4inen","raw_affiliation_strings":["University of Turku, Turku, Finland"],"affiliations":[{"raw_affiliation_string":"University of Turku, Turku, Finland","institution_ids":["https://openalex.org/I155660961"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5112631398","display_name":"Anna Salmela","orcid":null},"institutions":[{"id":"https://openalex.org/I155660961","display_name":"University of Turku","ror":"https://ror.org/05vghhr25","country_code":"FI","type":"education","lineage":["https://openalex.org/I155660961"]}],"countries":["FI"],"is_corresponding":false,"raw_author_name":"Anna Salmela","raw_affiliation_strings":["University of Turku, Turku, Finland"],"affiliations":[{"raw_affiliation_string":"University of Turku, Turku, Finland","institution_ids":["https://openalex.org/I155660961"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5061846947","display_name":"Douglas Biber","orcid":"https://orcid.org/0000-0002-7024-505X"},"institutions":[{"id":"https://openalex.org/I203172682","display_name":"Northern Arizona University","ror":"https://ror.org/0272j5188","country_code":"US","type":"education","lineage":["https://openalex.org/I203172682"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Douglas Biber","raw_affiliation_strings":["Northern Arizona University, Flagstaff, AZ, USA"],"affiliations":[{"raw_affiliation_string":"Northern Arizona University, Flagstaff, AZ, USA","institution_ids":["https://openalex.org/I203172682"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5033473979","display_name":"Jesse Egbert","orcid":"https://orcid.org/0000-0002-3751-2865"},"institutions":[{"id":"https://openalex.org/I203172682","display_name":"Northern Arizona University","ror":"https://ror.org/0272j5188","country_code":"US","type":"education","lineage":["https://openalex.org/I203172682"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jesse Egbert","raw_affiliation_strings":["Northern Arizona University, Flagstaff, AZ, USA"],"affiliations":[{"raw_affiliation_string":"Northern Arizona University, Flagstaff, AZ, USA","institution_ids":["https://openalex.org/I203172682"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5066925770","display_name":"Sampo Pyysalo","orcid":"https://orcid.org/0000-0002-6279-5000"},"institutions":[{"id":"https://openalex.org/I155660961","display_name":"University of Turku","ror":"https://ror.org/05vghhr25","country_code":"FI","type":"education","lineage":["https://openalex.org/I155660961"]}],"countries":["FI"],"is_corresponding":false,"raw_author_name":"Sampo Pyysalo","raw_affiliation_strings":["University of Turku, Turku, Finland"],"affiliations":[{"raw_affiliation_string":"University of Turku, Turku, Finland","institution_ids":["https://openalex.org/I155660961"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5036607482"],"corresponding_institution_ids":["https://openalex.org/I155660961"],"apc_list":null,"apc_paid":null,"fwci":0.9724,"has_fulltext":true,"cited_by_count":7,"citation_normalized_percentile":{"value":0.80027431,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":98},"biblio":{"volume":"57","issue":"3","first_page":"1045","last_page":"1079"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12380","display_name":"Authorship Attribution and Profiling","score":0.9973999857902527,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8255547285079956},{"id":"https://openalex.org/keywords/register","display_name":"Register (sociolinguistics)","score":0.7464492321014404},{"id":"https://openalex.org/keywords/identification","display_name":"Identification (biology)","score":0.7325030565261841},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5637990236282349},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5397549271583557},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.509835422039032},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.49544084072113037},{"id":"https://openalex.org/keywords/variation","display_name":"Variation (astronomy)","score":0.44354677200317383},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.4264428913593292},{"id":"https://openalex.org/keywords/web-application","display_name":"Web application","score":0.4202384948730469},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.1317780315876007}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8255547285079956},{"id":"https://openalex.org/C2779235478","wikidata":"https://www.wikidata.org/wiki/Q286576","display_name":"Register (sociolinguistics)","level":2,"score":0.7464492321014404},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.7325030565261841},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5637990236282349},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5397549271583557},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.509835422039032},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.49544084072113037},{"id":"https://openalex.org/C2778334786","wikidata":"https://www.wikidata.org/wiki/Q1586270","display_name":"Variation (astronomy)","level":2,"score":0.44354677200317383},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.4264428913593292},{"id":"https://openalex.org/C118643609","wikidata":"https://www.wikidata.org/wiki/Q189210","display_name":"Web application","level":2,"score":0.4202384948730469},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.1317780315876007},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C44870925","wikidata":"https://www.wikidata.org/wiki/Q37547","display_name":"Astrophysics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C59822182","wikidata":"https://www.wikidata.org/wiki/Q441","display_name":"Botany","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1007/s10579-022-09624-1","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10579-022-09624-1","pdf_url":"https://link.springer.com/content/pdf/10.1007/s10579-022-09624-1.pdf","source":{"id":"https://openalex.org/S4306424877","display_name":"Language Resources and Evaluation","issn_l":"1574-020X","issn":["1574-020X","1574-0218"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Language Resources and Evaluation","raw_type":"journal-article"},{"id":"pmh:oai:www.utupub.fi:10024/187005","is_oa":true,"landing_page_url":"https://link.springer.com/article/10.1007/s10579-022-09624-1","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":null}],"best_oa_location":{"id":"doi:10.1007/s10579-022-09624-1","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10579-022-09624-1","pdf_url":"https://link.springer.com/content/pdf/10.1007/s10579-022-09624-1.pdf","source":{"id":"https://openalex.org/S4306424877","display_name":"Language Resources and Evaluation","issn_l":"1574-020X","issn":["1574-020X","1574-0218"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Language Resources and Evaluation","raw_type":"journal-article"},"sustainable_development_goals":[{"score":0.46000000834465027,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[{"id":"https://openalex.org/G7021800142","display_name":null,"funder_award_id":"1147581","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G848032724","display_name":null,"funder_award_id":"Science","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G895001607","display_name":null,"funder_award_id":"Grant","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320322861","display_name":"Emil Aaltosen S\u00e4\u00e4ti\u00f6","ror":"https://ror.org/005rt3g54"},{"id":"https://openalex.org/F4320323313","display_name":"Turun Yliopisto","ror":"https://ror.org/05vghhr25"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4307348164.pdf","grobid_xml":"https://content.openalex.org/works/W4307348164.grobid-xml"},"referenced_works_count":75,"referenced_works":["https://openalex.org/W40340171","https://openalex.org/W64523720","https://openalex.org/W140890161","https://openalex.org/W229443363","https://openalex.org/W381317276","https://openalex.org/W411382560","https://openalex.org/W618142552","https://openalex.org/W1607545660","https://openalex.org/W1614298861","https://openalex.org/W1832693441","https://openalex.org/W1913261780","https://openalex.org/W1983578042","https://openalex.org/W1997083791","https://openalex.org/W2009727972","https://openalex.org/W2014313181","https://openalex.org/W2037193028","https://openalex.org/W2045455073","https://openalex.org/W2080018251","https://openalex.org/W2093585241","https://openalex.org/W2133564696","https://openalex.org/W2134943861","https://openalex.org/W2151560261","https://openalex.org/W2155870214","https://openalex.org/W2156354361","https://openalex.org/W2158139315","https://openalex.org/W2161411600","https://openalex.org/W2170240176","https://openalex.org/W2170571488","https://openalex.org/W2228424703","https://openalex.org/W2250539671","https://openalex.org/W2250998503","https://openalex.org/W2251771443","https://openalex.org/W2251787445","https://openalex.org/W2251803266","https://openalex.org/W2321170399","https://openalex.org/W2321621029","https://openalex.org/W2469520358","https://openalex.org/W2493916176","https://openalex.org/W2579928837","https://openalex.org/W2795582519","https://openalex.org/W2811332705","https://openalex.org/W2871167180","https://openalex.org/W2889478606","https://openalex.org/W2889638513","https://openalex.org/W2935760417","https://openalex.org/W2948902769","https://openalex.org/W2953368978","https://openalex.org/W2959312668","https://openalex.org/W2962739339","https://openalex.org/W2963026768","https://openalex.org/W2963626623","https://openalex.org/W2978017171","https://openalex.org/W2982575279","https://openalex.org/W2989245864","https://openalex.org/W3003266841","https://openalex.org/W3006657556","https://openalex.org/W3034617741","https://openalex.org/W3035390927","https://openalex.org/W3101860695","https://openalex.org/W3122399969","https://openalex.org/W3133840184","https://openalex.org/W3153727209","https://openalex.org/W3174827879","https://openalex.org/W4205167092","https://openalex.org/W4237534430","https://openalex.org/W4242390622","https://openalex.org/W4246413330","https://openalex.org/W6601610220","https://openalex.org/W6678277124","https://openalex.org/W6690170925","https://openalex.org/W6739901393","https://openalex.org/W6752788575","https://openalex.org/W6769318315","https://openalex.org/W6769430610","https://openalex.org/W7000635770"],"related_works":["https://openalex.org/W2748952813","https://openalex.org/W2050193698","https://openalex.org/W76669076","https://openalex.org/W1982697162","https://openalex.org/W2894950577","https://openalex.org/W4306677187","https://openalex.org/W3090759139","https://openalex.org/W2549642946","https://openalex.org/W2952389593","https://openalex.org/W4292713380"],"abstract_inverted_index":{"Abstract":[0],"This":[1],"article":[2],"examines":[3],"the":[4,45,58,64,78,83,95,102,114,125,134,153,162,177,183,186,193,200,208,219],"automatic":[5,209],"identification":[6,52,210],"of":[7,41,60,73,82,85,88,105,131,185,211,215],"Web":[8,65,98,108,216],"registers,":[9],"that":[10,113,159,192],"is,":[11,160],"text":[12],"varieties":[13],"such":[14],"as":[15],"news":[16],"articles":[17],"and":[18],"reviews.":[19],"Most":[20],"studies":[21],"have":[22],"focused":[23],"on":[24,44,63],"corpora":[25,36],"restricted":[26],"to":[27,68],"include":[28],"only":[29,38],"preselected":[30],"classes":[31],"with":[32,121,124,133,167],"well-defined":[33],"characteristics.":[34],"These":[35],"feature":[37],"a":[39,173,204,212],"subset":[40],"documents":[42],"found":[43],"unrestricted":[46,96,220],"open":[47,80,97],"Web,":[48,221],"for":[49,207,226],"which":[50,91],"register":[51,157,164],"has":[53],"been":[54],"particularly":[55],"difficult":[56],"because":[57],"range":[59],"linguistic":[61],"variation":[62],"is":[66,92,101],"known":[67],"be":[69,118],"substantial.":[70],"As":[71],"part":[72],"this":[74],"study,":[75],"we":[76,111,180],"present":[77],"first":[79,149],"release":[81],"Corpus":[84],"Online":[86],"Registers":[87],"English":[89],"(CORE),":[90],"drawn":[93],"from":[94,218],"and,":[99],"currently,":[100],"largest":[103],"collection":[104],"manually":[106],"annotated":[107],"registers.":[109],"Furthermore,":[110],"demonstrate":[112],"CORE":[115],"registers":[116,154,217],"can":[117],"automatically":[119],"identified":[120],"competitive":[122],"results,":[123],"best":[126,140],"performance":[127,141],"being":[128],"an":[129],"F1-score":[130],"68%":[132],"deep":[135],"learning":[136],"model":[137,189],"BERT.":[138],"The":[139,148],"was":[142],"achieved":[143],"using":[144,155],"two":[145],"modeling":[146,152],"strategies.":[147],"one":[150],"involved":[151],"propagated":[156],"labels,":[158],"repeating":[161],"main":[163],"label":[165,171],"along":[166],"its":[168],"corresponding":[169],"subregister":[170],"in":[172],"multilabel":[174],"model.":[175],"In":[176],"second":[178],"one,":[179],"explored":[181],"how":[182],"length":[184],"document":[187],"affects":[188],"performance,":[190],"discovering":[191],"beginning":[194],"provided":[195],"superior":[196],"classification":[197],"accuracy.":[198],"Overall,":[199],"current":[201],"study":[202],"presents":[203],"systematic":[205],"approach":[206],"large":[213],"number":[214],"hence":[222],"providing":[223],"new":[224],"pathways":[225],"future":[227],"studies.":[228]},"counts_by_year":[{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":5}],"updated_date":"2026-04-03T22:45:19.894376","created_date":"2025-10-10T00:00:00"}
