{"id":"https://openalex.org/W4408355587","doi":"https://doi.org/10.1109/icassp49660.2025.10889111","title":"Rethinking Mamba in Speech Processing by Self-Supervised Models","display_name":"Rethinking Mamba in Speech Processing by Self-Supervised Models","publication_year":2025,"publication_date":"2025-03-12","ids":{"openalex":"https://openalex.org/W4408355587","doi":"https://doi.org/10.1109/icassp49660.2025.10889111"},"language":"en","primary_location":{"id":"doi:10.1109/icassp49660.2025.10889111","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49660.2025.10889111","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100362464","display_name":"Xiangyu Zhang","orcid":"https://orcid.org/0000-0003-0876-6783"},"institutions":[{"id":"https://openalex.org/I31746571","display_name":"UNSW Sydney","ror":"https://ror.org/03r8z3t63","country_code":"AU","type":"education","lineage":["https://openalex.org/I31746571"]}],"countries":["AU"],"is_corresponding":true,"raw_author_name":"Xiangyu Zhang","raw_affiliation_strings":["The University of New South Wales"],"affiliations":[{"raw_affiliation_string":"The University of New South Wales","institution_ids":["https://openalex.org/I31746571"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102822627","display_name":"Jianbo Ma","orcid":"https://orcid.org/0000-0002-6765-9462"},"institutions":[{"id":"https://openalex.org/I31746571","display_name":"UNSW Sydney","ror":"https://ror.org/03r8z3t63","country_code":"AU","type":"education","lineage":["https://openalex.org/I31746571"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Jianbo Ma","raw_affiliation_strings":["The University of New South Wales"],"affiliations":[{"raw_affiliation_string":"The University of New South Wales","institution_ids":["https://openalex.org/I31746571"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5073612124","display_name":"Mostafa Shahin","orcid":"https://orcid.org/0000-0002-1091-8531"},"institutions":[{"id":"https://openalex.org/I31746571","display_name":"UNSW Sydney","ror":"https://ror.org/03r8z3t63","country_code":"AU","type":"education","lineage":["https://openalex.org/I31746571"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Mostafa Shahin","raw_affiliation_strings":["The University of New South Wales"],"affiliations":[{"raw_affiliation_string":"The University of New South Wales","institution_ids":["https://openalex.org/I31746571"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5077436139","display_name":"Beena Ahmed","orcid":"https://orcid.org/0000-0002-1240-6572"},"institutions":[{"id":"https://openalex.org/I31746571","display_name":"UNSW Sydney","ror":"https://ror.org/03r8z3t63","country_code":"AU","type":"education","lineage":["https://openalex.org/I31746571"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Beena Ahmed","raw_affiliation_strings":["The University of New South Wales"],"affiliations":[{"raw_affiliation_string":"The University of New South Wales","institution_ids":["https://openalex.org/I31746571"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5030922449","display_name":"Julien Epps","orcid":"https://orcid.org/0000-0001-6624-5551"},"institutions":[{"id":"https://openalex.org/I31746571","display_name":"UNSW Sydney","ror":"https://ror.org/03r8z3t63","country_code":"AU","type":"education","lineage":["https://openalex.org/I31746571"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Julien Epps","raw_affiliation_strings":["The University of New South Wales"],"affiliations":[{"raw_affiliation_string":"The University of New South Wales","institution_ids":["https://openalex.org/I31746571"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5100362464"],"corresponding_institution_ids":["https://openalex.org/I31746571"],"apc_list":null,"apc_paid":null,"fwci":7.8715,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.97375344,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":97,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9656000137329102,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9656000137329102,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7072870135307312},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5930871367454529},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.4416344463825226},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3636365234851837},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3350565433502197}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7072870135307312},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5930871367454529},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.4416344463825226},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3636365234851837},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3350565433502197}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp49660.2025.10889111","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49660.2025.10889111","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320334704","display_name":"Australian Research Council","ror":"https://ror.org/05mmh0f86"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":34,"referenced_works":["https://openalex.org/W1494198834","https://openalex.org/W2766219058","https://openalex.org/W2896457183","https://openalex.org/W2933138175","https://openalex.org/W2962780374","https://openalex.org/W2996320484","https://openalex.org/W3094502228","https://openalex.org/W3097777922","https://openalex.org/W3197580070","https://openalex.org/W3206996142","https://openalex.org/W3209059054","https://openalex.org/W4226380987","https://openalex.org/W4372267368","https://openalex.org/W4375869259","https://openalex.org/W4385245566","https://openalex.org/W4392902746","https://openalex.org/W4399168695","https://openalex.org/W4404783176","https://openalex.org/W4406322685","https://openalex.org/W4406461266","https://openalex.org/W4406461620","https://openalex.org/W4408352482","https://openalex.org/W4410087476","https://openalex.org/W6733862737","https://openalex.org/W6752051073","https://openalex.org/W6803444062","https://openalex.org/W6859298233","https://openalex.org/W6860915106","https://openalex.org/W6861387779","https://openalex.org/W6861744467","https://openalex.org/W6862616779","https://openalex.org/W6863493712","https://openalex.org/W6872209108","https://openalex.org/W6872665990"],"related_works":["https://openalex.org/W3204019825","https://openalex.org/W2981428355","https://openalex.org/W1834994814","https://openalex.org/W2041273198","https://openalex.org/W1599055764","https://openalex.org/W2131711534","https://openalex.org/W2149163000","https://openalex.org/W2962858469","https://openalex.org/W2289873871","https://openalex.org/W2559040841"],"abstract_inverted_index":{"The":[0],"Mamba-based":[1,26,75,109,130],"model":[2,46,76],"has":[3],"demonstrated":[4],"outstanding":[5],"performance":[6,28,65,142],"across":[7,30],"tasks":[8,36,54,80],"in":[9,19,35,78,124],"computer":[10],"vision,":[11],"natural":[12],"language":[13],"processing,":[14,24],"and":[15,41,133],"speech":[16,23,39,56,82],"processing.":[17,83],"However,":[18,52,84],"the":[20,25,44,64,71,74,98,107,120,134,140],"realm":[21],"of":[22,66,122],"model\u2019s":[27,141],"varies":[29],"different":[31],"tasks.":[32],"For":[33],"instance,":[34],"such":[37,88],"as":[38,89],"enhancement":[40],"spectrum":[42],"reconstruction,":[43],"Mamba":[45],"performs":[47],"well":[48],"when":[49],"used":[50],"independently.":[51],"for":[53,85],"like":[55],"recognition,":[57],"additional":[58,92],"modules":[59,93],"are":[60,94],"required":[61],"to":[62,96],"surpass":[63],"attention-based":[67],"models.":[68],"We":[69,127],"propose":[70],"hypothesis":[72],"that":[73],"excels":[77],"\"reconstruction\"":[79,99],"within":[81],"\"classification":[86],"tasks\"":[87],"Speech":[90,110],"Recognition,":[91],"necessary":[95],"accomplish":[97],"step.":[100],"To":[101],"validate":[102],"our":[103,125,145],"hypothesis,":[104],"we":[105,118],"analyze":[106],"previous":[108],"Models":[111],"from":[112],"an":[113],"information":[114,136],"theory":[115],"perspective.":[116],"Furthermore,":[117],"leveraged":[119],"properties":[121],"HuBERT":[123,131],"study.":[126],"trained":[128],"a":[129],"model,":[132],"mutual":[135],"patterns,":[137],"along":[138],"with":[139],"metrics,":[143],"confirmed":[144],"assumptions.":[146]},"counts_by_year":[{"year":2025,"cited_by_count":5}],"updated_date":"2025-12-19T19:40:27.379048","created_date":"2025-10-10T00:00:00"}
