{"id":"https://openalex.org/W2924380321","doi":"https://doi.org/10.1017/s135132491900038x","title":"Language model adaptation for language and dialect identification of text","display_name":"Language model adaptation for language and dialect identification of text","publication_year":2019,"publication_date":"2019-07-31","ids":{"openalex":"https://openalex.org/W2924380321","doi":"https://doi.org/10.1017/s135132491900038x","mag":"2924380321"},"language":"en","primary_location":{"id":"doi:10.1017/s135132491900038x","is_oa":true,"landing_page_url":"https://doi.org/10.1017/s135132491900038x","pdf_url":"https://www.cambridge.org/core/services/aop-cambridge-core/content/view/71F31438DEC6AB1653D0E3FA36E5F7C7/S135132491900038Xa.pdf/div-class-title-language-model-adaptation-for-language-and-dialect-identification-of-text-div.pdf","source":{"id":"https://openalex.org/S18088403","display_name":"Natural Language Engineering","issn_l":"1351-3249","issn":["1351-3249","1469-8110"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310311721","host_organization_name":"Cambridge University Press","host_organization_lineage":["https://openalex.org/P4310311721","https://openalex.org/P4310311702"],"host_organization_lineage_names":["Cambridge University Press","University of Cambridge"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Natural Language Engineering","raw_type":"journal-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://www.cambridge.org/core/services/aop-cambridge-core/content/view/71F31438DEC6AB1653D0E3FA36E5F7C7/S135132491900038Xa.pdf/div-class-title-language-model-adaptation-for-language-and-dialect-identification-of-text-div.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5065410247","display_name":"Tommi Jauhiainen","orcid":"https://orcid.org/0000-0002-6474-3570"},"institutions":[{"id":"https://openalex.org/I133731052","display_name":"University of Helsinki","ror":"https://ror.org/040af2s02","country_code":"FI","type":"education","lineage":["https://openalex.org/I133731052"]}],"countries":["FI"],"is_corresponding":true,"raw_author_name":"T. Jauhiainen","raw_affiliation_strings":["Department of Digital Humanities, University of Helsinki, Helsinki 00014, Finland"],"affiliations":[{"raw_affiliation_string":"Department of Digital Humanities, University of Helsinki, Helsinki 00014, Finland","institution_ids":["https://openalex.org/I133731052"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5001408607","display_name":"Krister Lind\u00e9n","orcid":"https://orcid.org/0000-0003-2337-303X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"K. Lind\u00e9n","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5073038710","display_name":"Heidi Jauhiainen","orcid":"https://orcid.org/0000-0002-8227-5627"},"institutions":[{"id":"https://openalex.org/I133731052","display_name":"University of Helsinki","ror":"https://ror.org/040af2s02","country_code":"FI","type":"education","lineage":["https://openalex.org/I133731052"]}],"countries":["FI"],"is_corresponding":false,"raw_author_name":"H. Jauhiainen","raw_affiliation_strings":["Department of Digital Humanities, University of Helsinki, Helsinki 00014, Finland"],"affiliations":[{"raw_affiliation_string":"Department of Digital Humanities, University of Helsinki, Helsinki 00014, Finland","institution_ids":["https://openalex.org/I133731052"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5065410247"],"corresponding_institution_ids":["https://openalex.org/I133731052"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.02359219,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"25","issue":"5","first_page":"561","last_page":"583"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9951000213623047,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9951000213623047,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12380","display_name":"Authorship Attribution and Profiling","score":0.9890000224113464,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10759","display_name":"Translation Studies and Practices","score":0.97079998254776,"subfield":{"id":"https://openalex.org/subfields/1203","display_name":"Language and Linguistics"},"field":{"id":"https://openalex.org/fields/12","display_name":"Arts and Humanities"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/identification","display_name":"Identification (biology)","score":0.8513974547386169},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.810046374797821},{"id":"https://openalex.org/keywords/adaptation","display_name":"Adaptation (eye)","score":0.7332910895347595},{"id":"https://openalex.org/keywords/language-identification","display_name":"Language identification","score":0.6887452602386475},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5763906240463257},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.5552955865859985},{"id":"https://openalex.org/keywords/german","display_name":"German","score":0.543487548828125},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4501882493495941},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.37133386731147766},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.25802919268608093},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.19227218627929688}],"concepts":[{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.8513974547386169},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.810046374797821},{"id":"https://openalex.org/C139807058","wikidata":"https://www.wikidata.org/wiki/Q352374","display_name":"Adaptation (eye)","level":2,"score":0.7332910895347595},{"id":"https://openalex.org/C129792486","wikidata":"https://www.wikidata.org/wiki/Q1050419","display_name":"Language identification","level":3,"score":0.6887452602386475},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5763906240463257},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.5552955865859985},{"id":"https://openalex.org/C154775046","wikidata":"https://www.wikidata.org/wiki/Q188","display_name":"German","level":2,"score":0.543487548828125},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4501882493495941},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.37133386731147766},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.25802919268608093},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.19227218627929688},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C169760540","wikidata":"https://www.wikidata.org/wiki/Q207011","display_name":"Neuroscience","level":1,"score":0.0},{"id":"https://openalex.org/C59822182","wikidata":"https://www.wikidata.org/wiki/Q441","display_name":"Botany","level":1,"score":0.0}],"mesh":[],"locations_count":5,"locations":[{"id":"doi:10.1017/s135132491900038x","is_oa":true,"landing_page_url":"https://doi.org/10.1017/s135132491900038x","pdf_url":"https://www.cambridge.org/core/services/aop-cambridge-core/content/view/71F31438DEC6AB1653D0E3FA36E5F7C7/S135132491900038Xa.pdf/div-class-title-language-model-adaptation-for-language-and-dialect-identification-of-text-div.pdf","source":{"id":"https://openalex.org/S18088403","display_name":"Natural Language Engineering","issn_l":"1351-3249","issn":["1351-3249","1469-8110"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310311721","host_organization_name":"Cambridge University Press","host_organization_lineage":["https://openalex.org/P4310311721","https://openalex.org/P4310311702"],"host_organization_lineage_names":["Cambridge University Press","University of Cambridge"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Natural Language Engineering","raw_type":"journal-article"},{"id":"pmh:oai:arXiv.org:1903.10915","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1903.10915","pdf_url":"https://arxiv.org/pdf/1903.10915","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"mag:2924380321","is_oa":true,"landing_page_url":"https://arxiv.org/pdf/1903.10915.pdf","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"pmh:oai:helda.helsinki.fi:10138/305333","is_oa":true,"landing_page_url":"http://hdl.handle.net/10138/305333","pdf_url":null,"source":{"id":"https://openalex.org/S4210213322","display_name":"Ty\u00f6v\u00e4entutkimus Vuosikirja","issn_l":"0784-1272","issn":["0784-1272","1459-7780"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Article"},{"id":"doi:10.48550/arxiv.1903.10915","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.1903.10915","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.1017/s135132491900038x","is_oa":true,"landing_page_url":"https://doi.org/10.1017/s135132491900038x","pdf_url":"https://www.cambridge.org/core/services/aop-cambridge-core/content/view/71F31438DEC6AB1653D0E3FA36E5F7C7/S135132491900038Xa.pdf/div-class-title-language-model-adaptation-for-language-and-dialect-identification-of-text-div.pdf","source":{"id":"https://openalex.org/S18088403","display_name":"Natural Language Engineering","issn_l":"1351-3249","issn":["1351-3249","1469-8110"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310311721","host_organization_name":"Cambridge University Press","host_organization_lineage":["https://openalex.org/P4310311721","https://openalex.org/P4310311702"],"host_organization_lineage_names":["Cambridge University Press","University of Cambridge"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Natural Language Engineering","raw_type":"journal-article"},"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.800000011920929}],"awards":[],"funders":[{"id":"https://openalex.org/F4320323433","display_name":"Koneen S\u00e4\u00e4ti\u00f6","ror":"https://ror.org/05jwty529"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2924380321.pdf","grobid_xml":"https://content.openalex.org/works/W2924380321.grobid-xml"},"referenced_works_count":71,"referenced_works":["https://openalex.org/W92274471","https://openalex.org/W244375653","https://openalex.org/W402546610","https://openalex.org/W565010989","https://openalex.org/W851785710","https://openalex.org/W1546539812","https://openalex.org/W1553301179","https://openalex.org/W1556664388","https://openalex.org/W1969913974","https://openalex.org/W1997241840","https://openalex.org/W2017237846","https://openalex.org/W2072223048","https://openalex.org/W2081748579","https://openalex.org/W2082439473","https://openalex.org/W2138309071","https://openalex.org/W2162019804","https://openalex.org/W2167308113","https://openalex.org/W2187238335","https://openalex.org/W2197073048","https://openalex.org/W2251301301","https://openalex.org/W2251862917","https://openalex.org/W2279695784","https://openalex.org/W2284764790","https://openalex.org/W2468355476","https://openalex.org/W2525877692","https://openalex.org/W2553512497","https://openalex.org/W2561747913","https://openalex.org/W2573442104","https://openalex.org/W2575880437","https://openalex.org/W2583190795","https://openalex.org/W2620806258","https://openalex.org/W2626499725","https://openalex.org/W2649800034","https://openalex.org/W2728897792","https://openalex.org/W2739520314","https://openalex.org/W2740338180","https://openalex.org/W2740358818","https://openalex.org/W2740917210","https://openalex.org/W2741135879","https://openalex.org/W2741270771","https://openalex.org/W2741365501","https://openalex.org/W2741798456","https://openalex.org/W2757188127","https://openalex.org/W2794998650","https://openalex.org/W2795267607","https://openalex.org/W2797514719","https://openalex.org/W2799012885","https://openalex.org/W2813705486","https://openalex.org/W2884629087","https://openalex.org/W2888160543","https://openalex.org/W2889057516","https://openalex.org/W2889205121","https://openalex.org/W2899166839","https://openalex.org/W2901457775","https://openalex.org/W2911964244","https://openalex.org/W2912514697","https://openalex.org/W2913128218","https://openalex.org/W2913168336","https://openalex.org/W2914072086","https://openalex.org/W2914704102","https://openalex.org/W2914798463","https://openalex.org/W2914870929","https://openalex.org/W2914943943","https://openalex.org/W2962758345","https://openalex.org/W2963302304","https://openalex.org/W2964068917","https://openalex.org/W6728886612","https://openalex.org/W6729896516","https://openalex.org/W7015702688","https://openalex.org/W7055868249","https://openalex.org/W7061722477"],"related_works":["https://openalex.org/W2342913127","https://openalex.org/W192243978","https://openalex.org/W2099765776","https://openalex.org/W2125106608","https://openalex.org/W261816329","https://openalex.org/W2789117037","https://openalex.org/W3101065397","https://openalex.org/W3162711154","https://openalex.org/W3040640815","https://openalex.org/W2101935221","https://openalex.org/W2181262297","https://openalex.org/W1941246423","https://openalex.org/W3006485553","https://openalex.org/W2251154944","https://openalex.org/W1986147089","https://openalex.org/W2161680891","https://openalex.org/W1576394385","https://openalex.org/W2548131017","https://openalex.org/W3093383192","https://openalex.org/W2926870023"],"abstract_inverted_index":{"Abstract":[0],"This":[1],"article":[2],"describes":[3],"an":[4,116],"unsupervised":[5,109],"language":[6,20,34,66,120],"model":[7],"(LM)":[8],"adaptation":[9,82,111],"approach":[10,24,79],"that":[11,108],"can":[12],"be":[13,113],"used":[14],"to":[15,27],"enhance":[16],"the":[17,32,45,57,60,71,88,96,102],"performance":[18],"of":[19,31,70],"identification":[21,35,63,67,121],"methods.":[22],"The":[23,51,77,105],"is":[25,38,54,130],"applied":[26],"a":[28],"current":[29],"version":[30],"HeLI":[33,41,46,90,92],"method,":[36],"which":[37,99],"now":[39],"called":[40],"2.0.":[42],"We":[43],"describe":[44],"2.0":[47,93],"method":[48],"in":[49,101,118,124],"detail.":[50],"resulting":[52],"system":[53],"evaluated":[55],"using":[56],"datasets":[58],"from":[59],"German":[61],"dialect":[62],"and":[64,75],"Indo-Aryan":[65],"shared":[68,103],"tasks":[69],"VarDial":[72],"workshops":[73],"2017":[74],"2018.":[76],"new":[78],"with":[80],"LM":[81,110],"provides":[83],"considerably":[84],"higher":[85],"F1-scores":[86],"than":[87],"basic":[89],"or":[91,95],"methods":[94],"other":[97],"systems":[98],"participated":[100],"tasks.":[104],"results":[106],"indicate":[107],"should":[112],"considered":[114],"as":[115],"option":[117],"all":[119],"tasks,":[122],"especially":[123],"those":[125],"where":[126],"encountering":[127],"out-of-domain":[128],"data":[129],"likely.":[131]},"counts_by_year":[],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2025-10-10T00:00:00"}
