{"id":"https://openalex.org/W7130327381","doi":"https://doi.org/10.1109/o-cocosda68185.2025.11385259","title":"Exploring Dialects with Speech Embeddings: Insights from Two Speech Databases in Assamese and Finnish","display_name":"Exploring Dialects with Speech Embeddings: Insights from Two Speech Databases in Assamese and Finnish","publication_year":2025,"publication_date":"2025-11-12","ids":{"openalex":"https://openalex.org/W7130327381","doi":"https://doi.org/10.1109/o-cocosda68185.2025.11385259"},"language":null,"primary_location":{"id":"doi:10.1109/o-cocosda68185.2025.11385259","is_oa":false,"landing_page_url":"https://doi.org/10.1109/o-cocosda68185.2025.11385259","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 28th Conference of the Oriental COCOSDA International Committee for the Co-ordination and Standardisation of Speech Databases and Assessment Techniques (O-COCOSDA)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5099721624","display_name":"Tuukka T\u00f6r\u00f6","orcid":null},"institutions":[{"id":"https://openalex.org/I4210110242","display_name":"Digital Science (United States)","ror":"https://ror.org/020h4b682","country_code":"US","type":"company","lineage":["https://openalex.org/I4210110242","https://openalex.org/I4210112888","https://openalex.org/I4210118830"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Tuukka T\u00f6r\u00f6","raw_affiliation_strings":["University of Helsinki,Department of Digital Humanities,Finland"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Helsinki,Department of Digital Humanities,Finland","institution_ids":["https://openalex.org/I4210110242"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5009644866","display_name":"Antti Suni","orcid":"https://orcid.org/0000-0003-3414-6035"},"institutions":[{"id":"https://openalex.org/I4210110242","display_name":"Digital Science (United States)","ror":"https://ror.org/020h4b682","country_code":"US","type":"company","lineage":["https://openalex.org/I4210110242","https://openalex.org/I4210112888","https://openalex.org/I4210118830"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Antti Suni","raw_affiliation_strings":["University of Helsinki,Department of Digital Humanities,Finland"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Helsinki,Department of Digital Humanities,Finland","institution_ids":["https://openalex.org/I4210110242"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5035443245","display_name":"Leena Dihingia","orcid":"https://orcid.org/0000-0002-1210-8524"},"institutions":[{"id":"https://openalex.org/I138537684","display_name":"Gauhati University","ror":"https://ror.org/01ppj9r51","country_code":"IN","type":"education","lineage":["https://openalex.org/I138537684"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Leena Dihingia","raw_affiliation_strings":["Gauhati University,Department of Assamese,India"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Gauhati University,Department of Assamese,India","institution_ids":["https://openalex.org/I138537684"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5099721625","display_name":"Juraj \u0160imko","orcid":null},"institutions":[{"id":"https://openalex.org/I4210110242","display_name":"Digital Science (United States)","ror":"https://ror.org/020h4b682","country_code":"US","type":"company","lineage":["https://openalex.org/I4210110242","https://openalex.org/I4210112888","https://openalex.org/I4210118830"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Juraj \u0160imko","raw_affiliation_strings":["University of Helsinki,Department of Digital Humanities,Finland"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Helsinki,Department of Digital Humanities,Finland","institution_ids":["https://openalex.org/I4210110242"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5126281773","display_name":"Priyankoo Sarmah","orcid":null},"institutions":[{"id":"https://openalex.org/I1317621060","display_name":"Indian Institute of Technology Guwahati","ror":"https://ror.org/0022nd079","country_code":"IN","type":"education","lineage":["https://openalex.org/I1317621060"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Priyankoo Sarmah","raw_affiliation_strings":["Indian Institute of Technology,Center for Linguistic Science and Technology,Guwahati,India"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Indian Institute of Technology,Center for Linguistic Science and Technology,Guwahati,India","institution_ids":["https://openalex.org/I1317621060"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.81593841,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12380","display_name":"Authorship Attribution and Profiling","score":0.3497999906539917,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12380","display_name":"Authorship Attribution and Profiling","score":0.3497999906539917,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11640","display_name":"Linguistic Variation and Morphology","score":0.22859999537467957,"subfield":{"id":"https://openalex.org/subfields/3310","display_name":"Linguistics and Language"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10403","display_name":"Phonetics and Phonology Research","score":0.11999999731779099,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/assamese","display_name":"Assamese","score":0.7466999888420105},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.5134999752044678},{"id":"https://openalex.org/keywords/linear-discriminant-analysis","display_name":"Linear discriminant analysis","score":0.4916999936103821},{"id":"https://openalex.org/keywords/variation","display_name":"Variation (astronomy)","score":0.4781000018119812},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.4562999904155731},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.37689998745918274},{"id":"https://openalex.org/keywords/identification","display_name":"Identification (biology)","score":0.3635999858379364}],"concepts":[{"id":"https://openalex.org/C2777834912","wikidata":"https://www.wikidata.org/wiki/Q29401","display_name":"Assamese","level":2,"score":0.7466999888420105},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5967000126838684},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5453000068664551},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.5134999752044678},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.49939998984336853},{"id":"https://openalex.org/C69738355","wikidata":"https://www.wikidata.org/wiki/Q1228929","display_name":"Linear discriminant analysis","level":2,"score":0.4916999936103821},{"id":"https://openalex.org/C2778334786","wikidata":"https://www.wikidata.org/wiki/Q1586270","display_name":"Variation (astronomy)","level":2,"score":0.4781000018119812},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.4562999904155731},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.38600000739097595},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.37689998745918274},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.3635999858379364},{"id":"https://openalex.org/C2777853878","wikidata":"https://www.wikidata.org/wiki/Q743569","display_name":"Phonetic transcription","level":2,"score":0.33730000257492065},{"id":"https://openalex.org/C155092808","wikidata":"https://www.wikidata.org/wiki/Q182557","display_name":"Computational linguistics","level":2,"score":0.30059999227523804},{"id":"https://openalex.org/C2780844864","wikidata":"https://www.wikidata.org/wiki/Q184377","display_name":"Pronunciation","level":2,"score":0.2915000021457672},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.290800005197525},{"id":"https://openalex.org/C91863865","wikidata":"https://www.wikidata.org/wiki/Q4349497","display_name":"Speech corpus","level":3,"score":0.28940001130104065},{"id":"https://openalex.org/C78397625","wikidata":"https://www.wikidata.org/wiki/Q192487","display_name":"Discriminant","level":2,"score":0.2865000069141388},{"id":"https://openalex.org/C136197465","wikidata":"https://www.wikidata.org/wiki/Q1729295","display_name":"Variety (cybernetics)","level":2,"score":0.26739999651908875},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.2578999996185303},{"id":"https://openalex.org/C2780148112","wikidata":"https://www.wikidata.org/wiki/Q1432581","display_name":"Proxy (statistics)","level":2,"score":0.2529999911785126}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/o-cocosda68185.2025.11385259","is_oa":false,"landing_page_url":"https://doi.org/10.1109/o-cocosda68185.2025.11385259","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 28th Conference of the Oriental COCOSDA International Committee for the Co-ordination and Standardisation of Speech Databases and Assessment Techniques (O-COCOSDA)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Reduced inequalities","score":0.7608773112297058,"id":"https://metadata.un.org/sdg/10"}],"awards":[{"id":"https://openalex.org/G6799201797","display_name":"Predictive Processing Approach to Modelling Prosodic Hierarchy for Speech Synthesis","funder_award_id":"357262","funder_id":"https://openalex.org/F4320321108","funder_display_name":"Academy of Finland"},{"id":"https://openalex.org/G6918193180","display_name":null,"funder_award_id":"357262","funder_id":"https://openalex.org/F4320321108","funder_display_name":"Academy of Finland"}],"funders":[{"id":"https://openalex.org/F4320321108","display_name":"Academy of Finland","ror":"https://ror.org/05k73zm37"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":12,"referenced_works":["https://openalex.org/W1908991523","https://openalex.org/W2101234009","https://openalex.org/W2106437550","https://openalex.org/W3139878283","https://openalex.org/W3213029956","https://openalex.org/W4230167808","https://openalex.org/W4230584948","https://openalex.org/W4254314071","https://openalex.org/W4283067018","https://openalex.org/W4292457883","https://openalex.org/W4400165185","https://openalex.org/W4413571888"],"related_works":[],"abstract_inverted_index":{"This":[0],"paper":[1],"presents":[2],"analyses":[3],"of":[4,68,84,94,113],"dialectal":[5,69,118],"variation":[6],"in":[7,36,111],"Assamese":[8],"and":[9,39,44,56,108],"Finnish":[10],"using":[11,87],"utterance-level":[12],"embeddings":[13],"extracted":[14],"from":[15],"a":[16,64,92],"self-supervised":[17],"speech":[18,32],"representation":[19],"model":[20],"fine-tuned":[21],"for":[22,78,101],"language":[23],"identification":[24],"(LID).":[25],"The":[26],"languages":[27],"are":[28],"represented":[29],"by":[30],"two":[31,51],"corpora":[33],"substantially":[34],"differing":[35],"their":[37],"design":[38],"composition.":[40],"Rather":[41],"than":[42],"extracting":[43],"analyzing":[45],"specific":[46],"acoustic":[47],"features,":[48],"we":[49],"apply":[50],"linear":[52,57],"transformations-principal":[53],"components":[54],"analysis":[55],"discriminant":[58],"analysis-on":[59],"the":[60,72,82,88],"embedding":[61],"space,":[62],"enabling":[63],"relatively":[65],"theory-independent":[66],"investigation":[67],"relationships":[70],"without":[71],"need":[73],"to":[74],"define":[75],"cross-linguistic":[76],"features":[77],"comparison.":[79],"We":[80,98],"evaluate":[81],"effects":[83],"these":[85],"transformations":[86],"geographical":[89],"distances":[90],"as":[91],"proxy":[93],"relatedness":[95],"among":[96],"varieties.":[97],"show":[99],"that":[100],"both":[102],"languages,":[103],"our":[104],"method":[105],"yields":[106],"quantifiable":[107],"interpretable":[109],"results":[110],"terms":[112],"clustering":[114],"varieties":[115],"into":[116],"meaningful":[117],"groupings.":[119]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-02-19T00:00:00"}
