{"id":"https://openalex.org/W3015586639","doi":"https://doi.org/10.1109/icassp40776.2020.9054573","title":"End-To-End Spoken Language Understanding Without Matched Language Speech Model Pretraining Data","display_name":"End-To-End Spoken Language Understanding Without Matched Language Speech Model Pretraining Data","publication_year":2020,"publication_date":"2020-04-09","ids":{"openalex":"https://openalex.org/W3015586639","doi":"https://doi.org/10.1109/icassp40776.2020.9054573","mag":"3015586639"},"language":"en","primary_location":{"id":"doi:10.1109/icassp40776.2020.9054573","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp40776.2020.9054573","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2020 - 2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5028120017","display_name":"Ryan Price","orcid":"https://orcid.org/0000-0003-0624-6629"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Ryan Price","raw_affiliation_strings":["Interactions, LLC"],"affiliations":[{"raw_affiliation_string":"Interactions, LLC","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5028120017"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.9884,"has_fulltext":false,"cited_by_count":17,"citation_normalized_percentile":{"value":0.88945492,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"7979","last_page":"7983"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8044629096984863},{"id":"https://openalex.org/keywords/spoken-language","display_name":"Spoken language","score":0.6462103724479675},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5808044075965881},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.5751539468765259},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5251184105873108},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.45521044731140137},{"id":"https://openalex.org/keywords/end-to-end-principle","display_name":"End-to-end principle","score":0.4441068172454834},{"id":"https://openalex.org/keywords/language-identification","display_name":"Language identification","score":0.43349871039390564},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.41762006282806396},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.30370014905929565}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8044629096984863},{"id":"https://openalex.org/C2776230583","wikidata":"https://www.wikidata.org/wiki/Q1322198","display_name":"Spoken language","level":2,"score":0.6462103724479675},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5808044075965881},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.5751539468765259},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5251184105873108},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.45521044731140137},{"id":"https://openalex.org/C74296488","wikidata":"https://www.wikidata.org/wiki/Q2527392","display_name":"End-to-end principle","level":2,"score":0.4441068172454834},{"id":"https://openalex.org/C129792486","wikidata":"https://www.wikidata.org/wiki/Q1050419","display_name":"Language identification","level":3,"score":0.43349871039390564},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.41762006282806396},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.30370014905929565},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp40776.2020.9054573","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp40776.2020.9054573","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2020 - 2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.6299999952316284}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":22,"referenced_works":["https://openalex.org/W648947103","https://openalex.org/W1494198834","https://openalex.org/W1649407914","https://openalex.org/W2035438624","https://openalex.org/W2084339293","https://openalex.org/W2127141656","https://openalex.org/W2193413348","https://openalex.org/W2786839803","https://openalex.org/W2891229414","https://openalex.org/W2894164357","https://openalex.org/W2894835365","https://openalex.org/W2913960647","https://openalex.org/W2936774411","https://openalex.org/W2963288440","https://openalex.org/W2964108264","https://openalex.org/W2964309797","https://openalex.org/W2972314145","https://openalex.org/W2972347614","https://openalex.org/W2972525948","https://openalex.org/W2972584841","https://openalex.org/W3015797571","https://openalex.org/W6687566353"],"related_works":["https://openalex.org/W3107474891","https://openalex.org/W226586525","https://openalex.org/W124635070","https://openalex.org/W1986021162","https://openalex.org/W2060410964","https://openalex.org/W1563618553","https://openalex.org/W92576643","https://openalex.org/W3008083857","https://openalex.org/W1986147089","https://openalex.org/W3015586639"],"abstract_inverted_index":{"In":[0],"contrast":[1],"to":[2,5,62,67,102],"conventional":[3],"approaches":[4,25],"spoken":[6],"language":[7,20,83,120,167],"understanding":[8,21],"(SLU)":[9],"that":[10],"consist":[11],"of":[12,43,73,106,137],"cascading":[13],"a":[14,18],"speech":[15,33,48],"recognizer":[16],"with":[17,125],"natural":[19],"component,":[22],"end-to-end":[23],"(E2E)":[24],"for":[26,47,55,91,115,141,169],"SLU":[27,58,75,95,108,143],"infer":[28],"semantics":[29],"directly":[30],"from":[31],"the":[32,44,52,56,69,81,104,118,135,157],"signal":[34],"without":[35],"processing":[36],"it":[37],"through":[38],"separate":[39],"subsystems.":[40],"Pretraining":[41],"part":[42],"E2E":[45,74,94,107,142],"models":[46,109],"recognition":[49],"before":[50],"finetuning":[51],"entire":[53],"model":[54],"target":[57,82,119],"task":[59],"has":[60],"proven":[61],"be":[63,89],"an":[64,93],"effective":[65],"method":[66],"address":[68],"increased":[70],"data":[71,114,129,168],"requirements":[72],"models.":[76],"However,":[77],"transcribed":[78,113],"corpora":[79],"in":[80,110,117],"and":[84,128],"domain":[85],"may":[86],"not":[87,163],"always":[88],"available":[90,152],"pretraining":[92,116,124],"model.":[96],"This":[97],"paper":[98],"proposes":[99],"two":[100,139,145],"strategies":[101],"improve":[103],"performance":[105],"scenarios":[111],"where":[112,154],"is":[121],"unavailable:":[122],"multilingual":[123],"mismatched":[126],"languages":[127],"augmentation":[130],"using":[131,164],"SpecAugment[1].":[132],"We":[133],"demonstrate":[134],"effectiveness":[136],"these":[138],"methods":[140],"on":[144],"datasets,":[146],"including":[147],"one":[148],"recently":[149],"released":[150],"publicly":[151],"dataset":[153],"we":[155],"surpass":[156],"best":[158],"previously":[159],"published":[160],"result":[161],"despite":[162],"any":[165],"matched":[166],"pretraining.":[170]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":10},{"year":2020,"cited_by_count":3},{"year":2019,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
