{"id":"https://openalex.org/W4387872937","doi":"https://doi.org/10.1109/mlsp55844.2023.10285964","title":"Compressing Wav2vec2 for Embedded Applications","display_name":"Compressing Wav2vec2 for Embedded Applications","publication_year":2023,"publication_date":"2023-09-17","ids":{"openalex":"https://openalex.org/W4387872937","doi":"https://doi.org/10.1109/mlsp55844.2023.10285964"},"language":"en","primary_location":{"id":"doi:10.1109/mlsp55844.2023.10285964","is_oa":false,"landing_page_url":"https://doi.org/10.1109/mlsp55844.2023.10285964","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 IEEE 33rd International Workshop on Machine Learning for Signal Processing (MLSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5074450413","display_name":"Oswaldo Ludwig","orcid":"https://orcid.org/0000-0001-5952-6195"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Oswaldo Ludwig","raw_affiliation_strings":["Cerence Inc., Guldensporenpark 32,Merelbeke,Belgium,9820"],"affiliations":[{"raw_affiliation_string":"Cerence Inc., Guldensporenpark 32,Merelbeke,Belgium,9820","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5057752299","display_name":"Tom Claes","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tom Claes","raw_affiliation_strings":["Cerence Inc., Guldensporenpark 32,Merelbeke,Belgium,9820"],"affiliations":[{"raw_affiliation_string":"Cerence Inc., Guldensporenpark 32,Merelbeke,Belgium,9820","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5074450413"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.2473,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.70174904,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10515","display_name":"Cancer-related molecular mechanisms research","score":0.9873999953269958,"subfield":{"id":"https://openalex.org/subfields/1306","display_name":"Cancer Research"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T10515","display_name":"Cancer-related molecular mechanisms research","score":0.9873999953269958,"subfield":{"id":"https://openalex.org/subfields/1306","display_name":"Cancer Research"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T12676","display_name":"Machine Learning and ELM","score":0.9613999724388123,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9304999709129333,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7926262617111206},{"id":"https://openalex.org/keywords/pruning","display_name":"Pruning","score":0.7234510183334351},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.5983990430831909},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.5611403584480286},{"id":"https://openalex.org/keywords/approximation-error","display_name":"Approximation error","score":0.536561906337738},{"id":"https://openalex.org/keywords/word-error-rate","display_name":"Word error rate","score":0.5123913884162903},{"id":"https://openalex.org/keywords/memory-footprint","display_name":"Memory footprint","score":0.5020143985748291},{"id":"https://openalex.org/keywords/block","display_name":"Block (permutation group theory)","score":0.46787023544311523},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.45286035537719727},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.41142746806144714},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.32021254301071167},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.14508885145187378},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.08372864127159119}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7926262617111206},{"id":"https://openalex.org/C108010975","wikidata":"https://www.wikidata.org/wiki/Q500094","display_name":"Pruning","level":2,"score":0.7234510183334351},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.5983990430831909},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.5611403584480286},{"id":"https://openalex.org/C122383733","wikidata":"https://www.wikidata.org/wiki/Q865920","display_name":"Approximation error","level":2,"score":0.536561906337738},{"id":"https://openalex.org/C40969351","wikidata":"https://www.wikidata.org/wiki/Q3516228","display_name":"Word error rate","level":2,"score":0.5123913884162903},{"id":"https://openalex.org/C74912251","wikidata":"https://www.wikidata.org/wiki/Q6815727","display_name":"Memory footprint","level":2,"score":0.5020143985748291},{"id":"https://openalex.org/C2777210771","wikidata":"https://www.wikidata.org/wiki/Q4927124","display_name":"Block (permutation group theory)","level":2,"score":0.46787023544311523},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.45286035537719727},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.41142746806144714},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.32021254301071167},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.14508885145187378},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.08372864127159119},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C6557445","wikidata":"https://www.wikidata.org/wiki/Q173113","display_name":"Agronomy","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/mlsp55844.2023.10285964","is_oa":false,"landing_page_url":"https://doi.org/10.1109/mlsp55844.2023.10285964","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 IEEE 33rd International Workshop on Machine Learning for Signal Processing (MLSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.6299999952316284,"display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":27,"referenced_works":["https://openalex.org/W1915251500","https://openalex.org/W2011967084","https://openalex.org/W2126811106","https://openalex.org/W2294543795","https://openalex.org/W2896457183","https://openalex.org/W2946417913","https://openalex.org/W2999905431","https://openalex.org/W3007007518","https://openalex.org/W3015720739","https://openalex.org/W3036601975","https://openalex.org/W3038012435","https://openalex.org/W3088144810","https://openalex.org/W3093579165","https://openalex.org/W3097777922","https://openalex.org/W3197580070","https://openalex.org/W3198429080","https://openalex.org/W3198771897","https://openalex.org/W3203140070","https://openalex.org/W4206375145","https://openalex.org/W4308359019","https://openalex.org/W6640059789","https://openalex.org/W6755207826","https://openalex.org/W6762945437","https://openalex.org/W6777017071","https://openalex.org/W6780226713","https://openalex.org/W6784614252","https://openalex.org/W6811289971"],"related_works":["https://openalex.org/W2373300491","https://openalex.org/W4390516098","https://openalex.org/W2378744544","https://openalex.org/W2594301978","https://openalex.org/W2379704676","https://openalex.org/W2181948922","https://openalex.org/W1998810860","https://openalex.org/W4206442282","https://openalex.org/W2384505857","https://openalex.org/W2355171581"],"abstract_inverted_index":{"Wav2vec2":[0,17,85],"self-supervised":[1],"multilingual":[2],"training":[3],"learns":[4],"speech":[5],"units":[6],"common":[7],"to":[8,12,37,69,100],"multiple":[9],"languages,":[10],"leading":[11],"better":[13],"generalization":[14],"capacity.":[15],"However,":[16],"is":[18,36],"larger":[19],"than":[20],"other":[21],"E2E":[22],"ASR":[23],"models":[24],"such":[25],"as":[26],"the":[27,31,39,46,51,61,71,84,102,136],"Conformer":[28],"ASR.":[29],"Therefore,":[30],"objective":[32],"of":[33,50,60,83,120,127,135],"this":[34,131],"work":[35],"reduce":[38],"Wav2vec":[40],"footprint":[41],"by":[42,143],"pruning":[43,126,139],"lines":[44],"from":[45],"intermediate":[47],"dense":[48],"layers":[49],"encoder":[52,62],"block,":[53],"since":[54],"they":[55],"represent":[56],"about":[57],"two":[58],"thirds":[59],"parameters.":[63],"We":[64],"apply":[65],"Genetic":[66],"Algorithms":[67],"(GA)":[68],"solve":[70],"combinatorial":[72],"optimization":[73],"problem":[74],"associated":[75],"with":[76,107,133],"pruning,":[77],"which":[78],"means":[79],"running":[80],"many":[81],"copies":[82],"decoder":[86],"in":[87],"parallel":[88],"using":[89],"multiprocessing":[90],"on":[91],"a":[92,113,125],"computer":[93],"grid,":[94],"so":[95],"an":[96],"effort":[97],"was":[98],"made":[99],"optimize":[101],"GA":[103],"for":[104,124],"good":[105],"performance":[106],"few":[108],"CPUs.":[109],"The":[110],"experiments":[111],"show":[112],"small":[114],"absolute":[115],"word":[116],"error":[117],"rate":[118],"damage":[119],"0.21%":[121],"(1.26%":[122],"relative)":[123],"40%":[128],"and":[129,140],"compare":[130],"value":[132,145],"those":[134],"usual":[137],"L1-norm":[138],"model":[141],"restructuring":[142],"singular":[144],"decomposition.":[146]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
