{"id":"https://openalex.org/W4297841657","doi":"https://doi.org/10.21437/interspeech.2022-11281","title":"Transducer-based language embedding for spoken language identification","display_name":"Transducer-based language embedding for spoken language identification","publication_year":2022,"publication_date":"2022-09-16","ids":{"openalex":"https://openalex.org/W4297841657","doi":"https://doi.org/10.21437/interspeech.2022-11281"},"language":"en","primary_location":{"id":"doi:10.21437/interspeech.2022-11281","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2022-11281","pdf_url":null,"source":{"id":"https://openalex.org/S4363604309","display_name":"Interspeech 2022","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2022","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101966522","display_name":"Peng Shen","orcid":"https://orcid.org/0000-0002-0176-127X"},"institutions":[{"id":"https://openalex.org/I90023481","display_name":"National Institute of Information and Communications Technology","ror":"https://ror.org/016bgq349","country_code":"JP","type":"facility","lineage":["https://openalex.org/I90023481"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Peng Shen","raw_affiliation_strings":["National Institute of Information and Communications Technology (NICT"],"affiliations":[{"raw_affiliation_string":"National Institute of Information and Communications Technology (NICT","institution_ids":["https://openalex.org/I90023481"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034792613","display_name":"Xugang Lu","orcid":"https://orcid.org/0000-0001-7075-448X"},"institutions":[{"id":"https://openalex.org/I90023481","display_name":"National Institute of Information and Communications Technology","ror":"https://ror.org/016bgq349","country_code":"JP","type":"facility","lineage":["https://openalex.org/I90023481"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Xugang Lu","raw_affiliation_strings":["National Institute of Information and Communications Technology (NICT"],"affiliations":[{"raw_affiliation_string":"National Institute of Information and Communications Technology (NICT","institution_ids":["https://openalex.org/I90023481"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5114514387","display_name":"Hisashi Kawai","orcid":"https://orcid.org/0000-0002-0914-5092"},"institutions":[{"id":"https://openalex.org/I90023481","display_name":"National Institute of Information and Communications Technology","ror":"https://ror.org/016bgq349","country_code":"JP","type":"facility","lineage":["https://openalex.org/I90023481"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Hisashi Kawai","raw_affiliation_strings":["National Institute of Information and Communications Technology (NICT"],"affiliations":[{"raw_affiliation_string":"National Institute of Information and Communications Technology (NICT","institution_ids":["https://openalex.org/I90023481"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5101966522"],"corresponding_institution_ids":["https://openalex.org/I90023481"],"apc_list":null,"apc_paid":null,"fwci":0.7367,"has_fulltext":false,"cited_by_count":7,"citation_normalized_percentile":{"value":0.71737517,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"3724","last_page":"3728"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9861999750137329,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9861999750137329,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7526181936264038},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.6589533090591431},{"id":"https://openalex.org/keywords/spoken-language","display_name":"Spoken language","score":0.5686752200126648},{"id":"https://openalex.org/keywords/language-identification","display_name":"Language identification","score":0.5657863616943359},{"id":"https://openalex.org/keywords/identification","display_name":"Identification (biology)","score":0.5319393277168274},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.49534860253334045},{"id":"https://openalex.org/keywords/transducer","display_name":"Transducer","score":0.47499552369117737},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.43590739369392395},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.39692509174346924},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.3405494689941406},{"id":"https://openalex.org/keywords/acoustics","display_name":"Acoustics","score":0.16588598489761353},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.0802922248840332}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7526181936264038},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.6589533090591431},{"id":"https://openalex.org/C2776230583","wikidata":"https://www.wikidata.org/wiki/Q1322198","display_name":"Spoken language","level":2,"score":0.5686752200126648},{"id":"https://openalex.org/C129792486","wikidata":"https://www.wikidata.org/wiki/Q1050419","display_name":"Language identification","level":3,"score":0.5657863616943359},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.5319393277168274},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.49534860253334045},{"id":"https://openalex.org/C56318395","wikidata":"https://www.wikidata.org/wiki/Q215928","display_name":"Transducer","level":2,"score":0.47499552369117737},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.43590739369392395},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.39692509174346924},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.3405494689941406},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.16588598489761353},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0802922248840332},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C59822182","wikidata":"https://www.wikidata.org/wiki/Q441","display_name":"Botany","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.21437/interspeech.2022-11281","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2022-11281","pdf_url":null,"source":{"id":"https://openalex.org/S4363604309","display_name":"Interspeech 2022","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2022","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","score":0.4099999964237213,"id":"https://metadata.un.org/sdg/4"}],"awards":[{"id":"https://openalex.org/G1069223013","display_name":null,"funder_award_id":"JSPS KAKENHI","funder_id":"https://openalex.org/F4320334764","funder_display_name":"Japan Society for the Promotion of Science"},{"id":"https://openalex.org/G1933719371","display_name":"Construction of a computational model to deal with the cocktail-party problem for intelligent speech interface","funder_award_id":"19K12035","funder_id":"https://openalex.org/F4320334764","funder_display_name":"Japan Society for the Promotion of Science"},{"id":"https://openalex.org/G4636223006","display_name":null,"funder_award_id":"JSPS KAK","funder_id":"https://openalex.org/F4320334764","funder_display_name":"Japan Society for the Promotion of Science"},{"id":"https://openalex.org/G6208493299","display_name":"\u5730\u4e3b\u5236\u5c55\u958b\u904e\u7a0b\u306e\u7814\u7a76","funder_award_id":"12035","funder_id":"https://openalex.org/F4320334764","funder_display_name":"Japan Society for the Promotion of Science"},{"id":"https://openalex.org/G914184973","display_name":"Self-supervised graph-based representation for language and speaker detection","funder_award_id":"21K17776","funder_id":"https://openalex.org/F4320334764","funder_display_name":"Japan Society for the Promotion of Science"}],"funders":[{"id":"https://openalex.org/F4320334764","display_name":"Japan Society for the Promotion of Science","ror":"https://ror.org/00hhkn466"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":31,"referenced_works":["https://openalex.org/W2078169166","https://openalex.org/W2104457544","https://openalex.org/W2371039490","https://openalex.org/W2406982333","https://openalex.org/W2408175559","https://openalex.org/W2515090196","https://openalex.org/W2612434969","https://openalex.org/W2747270953","https://openalex.org/W2748488820","https://openalex.org/W2807627734","https://openalex.org/W2889341949","https://openalex.org/W2890964092","https://openalex.org/W2936774411","https://openalex.org/W2938358845","https://openalex.org/W2962893195","https://openalex.org/W2963263347","https://openalex.org/W2972645685","https://openalex.org/W2974231335","https://openalex.org/W3010814938","https://openalex.org/W3024869864","https://openalex.org/W3095410713","https://openalex.org/W3097777922","https://openalex.org/W3137976833","https://openalex.org/W3139878283","https://openalex.org/W3163421828","https://openalex.org/W3196487394","https://openalex.org/W3197530164","https://openalex.org/W3198614662","https://openalex.org/W4287553982","https://openalex.org/W4288072840","https://openalex.org/W4297727296"],"related_works":["https://openalex.org/W3088333221","https://openalex.org/W4300928910","https://openalex.org/W2578342220","https://openalex.org/W2583219169","https://openalex.org/W1986021162","https://openalex.org/W3045567129","https://openalex.org/W2060410964","https://openalex.org/W3134336087","https://openalex.org/W92576643","https://openalex.org/W2761417937"],"abstract_inverted_index":{"The":[0],"acoustic":[1,20,72],"and":[2,74,89,108,116],"linguistic":[3,28,62,76],"features":[4,21,73,77],"are":[5],"important":[6],"cues":[7],"for":[8,41,78],"the":[9,24,56,59,65,85,94,99],"spoken":[10],"language":[11,38,52],"identification":[12],"(LID)":[13],"task.Recent":[14],"advanced":[15],"LID":[16,42,79,102],"systems":[17],"mainly":[18],"use":[19],"that":[22],"lack":[23],"usage":[25],"of":[26,58],"explicit":[27,75],"feature":[29],"encoding.In":[30],"this":[31],"paper,":[32],"we":[33],"propose":[34],"a":[35,51],"novel":[36],"transducer-based":[37],"embedding":[39,53],"approach":[40],"tasks":[43,103],"by":[44],"integrating":[45],"an":[46],"RNN":[47,60],"transducer":[48],"model":[49],"into":[50],"framework.Benefiting":[54],"from":[55],"advantages":[57],"transducer's":[61],"representation":[63],"capability,":[64],"proposed":[66,95],"method":[67,96],"can":[68],"exploit":[69],"both":[70],"phonetically-aware":[71],"tasks.Experiments":[80],"were":[81],"carried":[82],"out":[83],"on":[84,101,114],"large-scale":[86],"multilingual":[87],"LibriSpeech":[88],"VoxLingua107":[90],"datasets.Experimental":[91],"results":[92],"showed":[93],"significantly":[97],"improves":[98],"performance":[100],"with":[104],"12%":[105],"to":[106,110],"59%":[107],"16%":[109],"24%":[111],"relative":[112],"improvement":[113],"in-domain":[115],"cross-domain":[117],"datasets,":[118],"respectively.":[119]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":3},{"year":2022,"cited_by_count":1}],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-10-10T00:00:00"}
