{"id":"https://openalex.org/W6948633898","doi":"https://doi.org/10.5075/epfl-thesis-10616","title":"Low-Resource Speech Recognition and Understanding for Challenging Applications","display_name":"Low-Resource Speech Recognition and Understanding for Challenging Applications","publication_year":2024,"publication_date":"2024-01-01","ids":{"openalex":"https://openalex.org/W6948633898","doi":"https://doi.org/10.5075/epfl-thesis-10616"},"language":"en","primary_location":{"id":"pmh:oai:infoscience.epfl.ch:20.500.14299/240713","is_oa":true,"landing_page_url":"https://infoscience.epfl.ch/handle/20.500.14299/240713","pdf_url":"https://infoscience.epfl.ch/bitstreams/5478b4ac-e381-420a-9be2-3baa35f8aba4/download","source":{"id":"https://openalex.org/S4306400487","display_name":"Infoscience (Ecole Polytechnique F\u00e9d\u00e9rale de Lausanne)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"THESIS","raw_type":"doctoral thesis"},"type":"dissertation","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://infoscience.epfl.ch/bitstreams/5478b4ac-e381-420a-9be2-3baa35f8aba4/download","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Zuluaga Gomez, Juan Pablo","orcid":null},"institutions":[{"id":"https://openalex.org/I5124864","display_name":"\u00c9cole Polytechnique F\u00e9d\u00e9rale de Lausanne","ror":"https://ror.org/02s376052","country_code":"CH","type":"education","lineage":["https://openalex.org/I2799323385","https://openalex.org/I5124864"]}],"countries":["CH"],"is_corresponding":true,"raw_author_name":"Zuluaga Gomez, Juan Pablo","raw_affiliation_strings":["Ecole polytechnique f\u00e9d\u00e9rale de Lausanne"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Ecole polytechnique f\u00e9d\u00e9rale de Lausanne","institution_ids":["https://openalex.org/I5124864"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I5124864"],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":true,"primary_topic":{"id":"https://openalex.org/T10895","display_name":"Species Distribution and Climate Change","score":0.32659998536109924,"subfield":{"id":"https://openalex.org/subfields/2302","display_name":"Ecological Modeling"},"field":{"id":"https://openalex.org/fields/23","display_name":"Environmental Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10895","display_name":"Species Distribution and Climate Change","score":0.32659998536109924,"subfield":{"id":"https://openalex.org/subfields/2302","display_name":"Ecological Modeling"},"field":{"id":"https://openalex.org/fields/23","display_name":"Environmental Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13124","display_name":"Chemical synthesis and alkaloids","score":0.12489999830722809,"subfield":{"id":"https://openalex.org/subfields/1605","display_name":"Organic Chemistry"},"field":{"id":"https://openalex.org/fields/16","display_name":"Chemistry"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13130","display_name":"Biological and pharmacological studies of plants","score":0.06239999830722809,"subfield":{"id":"https://openalex.org/subfields/2736","display_name":"Pharmacology"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/filter","display_name":"Filter (signal processing)","score":0.2784000039100647},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.2703999876976013},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.2603999972343445},{"id":"https://openalex.org/keywords/identification","display_name":"Identification (biology)","score":0.25040000677108765},{"id":"https://openalex.org/keywords/noise","display_name":"Noise (video)","score":0.24500000476837158},{"id":"https://openalex.org/keywords/term","display_name":"Term (time)","score":0.24210000038146973}],"concepts":[{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.31520000100135803},{"id":"https://openalex.org/C106131492","wikidata":"https://www.wikidata.org/wiki/Q3072260","display_name":"Filter (signal processing)","level":2,"score":0.2784000039100647},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.2703999876976013},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.2603999972343445},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.25040000677108765},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.24500000476837158},{"id":"https://openalex.org/C61797465","wikidata":"https://www.wikidata.org/wiki/Q1188986","display_name":"Term (time)","level":2,"score":0.24210000038146973},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.23469999432563782},{"id":"https://openalex.org/C9652623","wikidata":"https://www.wikidata.org/wiki/Q190109","display_name":"Field (mathematics)","level":2,"score":0.23389999568462372},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.22579999268054962}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:infoscience.epfl.ch:20.500.14299/240713","is_oa":true,"landing_page_url":"https://infoscience.epfl.ch/handle/20.500.14299/240713","pdf_url":"https://infoscience.epfl.ch/bitstreams/5478b4ac-e381-420a-9be2-3baa35f8aba4/download","source":{"id":"https://openalex.org/S4306400487","display_name":"Infoscience (Ecole Polytechnique F\u00e9d\u00e9rale de Lausanne)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"THESIS","raw_type":"doctoral thesis"},{"id":"doi:10.5075/epfl-thesis-10616","is_oa":true,"landing_page_url":"https://doi.org/10.5075/epfl-thesis-10616","pdf_url":null,"source":{"id":"https://openalex.org/S4306400488","display_name":"Infoscience (Ecole Polytechnique F\u00e9d\u00e9rale de Lausanne)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"Dissertation"}],"best_oa_location":{"id":"pmh:oai:infoscience.epfl.ch:20.500.14299/240713","is_oa":true,"landing_page_url":"https://infoscience.epfl.ch/handle/20.500.14299/240713","pdf_url":"https://infoscience.epfl.ch/bitstreams/5478b4ac-e381-420a-9be2-3baa35f8aba4/download","source":{"id":"https://openalex.org/S4306400487","display_name":"Infoscience (Ecole Polytechnique F\u00e9d\u00e9rale de Lausanne)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"THESIS","raw_type":"doctoral thesis"},"sustainable_development_goals":[{"display_name":"Quality Education","score":0.5812042951583862,"id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W6948633898.pdf"},"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Automatic":[0],"speech":[1,25],"recognition":[2],"(ASR)":[3],"and":[4,21,29,61,114,136,160,177,200,216,229,256,286,294,312,321],"spoken":[5],"language":[6,34],"understanding":[7,35],"(SLU)":[8],"is":[9,120,226],"the":[10,52,134,142,195,227,250,261,272,279],"core":[11],"component":[12],"of":[13,97,138,144,168],"current":[14,76],"voice-powered":[15],"AI":[16],"assistants":[17],"such":[18,132,147,275],"as":[19,133,148,276],"Siri":[20],"Alexa.":[22],"It":[23],"involves":[24],"transcription":[26],"with":[27,32,54,72],"ASR":[28,48,102,202,235,264,320],"its":[30],"comprehension":[31],"natural":[33],"(NLU)":[36],"systems.":[37],"Traditionally,":[38],"SLU":[39,98,273,289,322],"runs":[40],"on":[41,69,93,198,278],"a":[42,94,105,326],"cascaded":[43,284],"setting,":[44],"where":[45],"an":[46,121,302],"in-domain":[47],"system":[49],"automatically":[50],"generates":[51],"transcripts":[53],"valuable":[55],"semantic":[56],"information,":[57],"e.g.,":[58],"named":[59],"entities":[60],"intents.":[62],"These":[63],"components":[64],"have":[65,78,89,128],"been":[66],"generally":[67],"based":[68],"statistical":[70],"approaches":[71],"hand-craft":[73],"features.":[74],"However,":[75],"trends":[77],"shifted":[79],"towards":[80],"large-scale":[81,139],"end-to-end":[82],"(E2E)":[83],"deep":[84],"neural":[85],"networks":[86],"(DNN),":[87],"which":[88,219],"shown":[90],"superior":[91],"performance":[92],"wide":[95],"range":[96],"tasks.":[99],"For":[100],"example,":[101],"has":[103],"seen":[104],"rapid":[106],"transition":[107],"from":[108,185,237,271,292],"traditional":[109],"hybrid-based":[110,199],"modeling":[111],"to":[112,171,180,193,209,221,232,282],"encoder-decoder":[113],"Transducer-based":[115],"modeling.":[116],"Even":[117],"though":[118],"there":[119],"undeniable":[122],"improvement":[123],"in":[124,301,325],"performance,":[125],"other":[126,288],"challenges":[127,182],"come":[129],"into":[130],"play,":[131],"urgency":[135],"need":[137,143],"supervised":[140,243],"datasets;":[141],"additional":[145],"modalities,":[146],"contextual":[149,212],"knowledge;":[150],"massive":[151],"GPU":[152],"clusters":[153],"for":[154,164,239],"training":[155,217,255],"large":[156,162],"models;":[157],"or":[158],"high-performance":[159],"robust":[161],"models":[163,236],"complex":[165,186],"applications.":[166],"All":[167],"this":[169,306],"leads":[170,220],"major":[172],"challenges.":[173],"This":[174],"dissertation":[175,307],"explores":[176],"propose":[178],"solutions":[179],"these":[181],"that":[183,297,317],"arise":[184],"settings.":[187],"Specifically,":[188],"we":[189,248,267],"address:":[190],"(1)":[191],"How":[192,208,246],"overcome":[194],"data":[196],"scarcity":[197],"E2E":[201,263,303],"models,":[203],"i.e.,":[204],"low-resource":[205],"applications?":[206],"(2)":[207],"properly":[210],"integrate":[211],"knowledge":[213],"at":[214,254],"decoding":[215],"time,":[218],"improved":[222],"models?":[223],"(3)":[224],"What":[225],"fastest":[228],"best":[230],"approach":[231],"train":[233],"streaming":[234],"scratch":[238],"challenging":[240],"domains":[241],"without":[242],"data?":[244],"(4)":[245],"do":[247],"reduce":[249],"computational":[251],"budget":[252],"required":[253],"inference":[257],"time":[258],"by":[259,309],"modifying":[260],"state-of-the-art":[262],"architectures?":[265],"Similarly,":[266],"target":[268],"some":[269],"questions":[270],"perspective,":[274],"analysis":[277],"optimal":[280],"representations":[281],"perform":[283],"SLU,":[285],"exploring":[287],"tasks":[290,323],"aside":[291],"intent":[293],"slot":[295],"filing":[296],"can":[298,318],"be":[299],"performed":[300],"fashion.":[304],"Finally,":[305],"closes":[308],"covering":[310],"STAC-ST":[311],"TokenVerse,":[313],"two":[314],"novel":[315],"architectures":[316],"handle":[319],"seamlessly":[324],"single":[327],"model":[328],"via":[329],"special":[330],"tokens.":[331]},"counts_by_year":[],"updated_date":"2026-07-01T06:00:48.157686","created_date":"2025-10-10T00:00:00"}
