{"id":"https://openalex.org/W7140272158","doi":"https://doi.org/10.48550/arxiv.2603.23048","title":"MSR-HuBERT: Self-supervised Pre-training for Adaptation to Multiple Sampling Rates","display_name":"MSR-HuBERT: Self-supervised Pre-training for Adaptation to Multiple Sampling Rates","publication_year":2026,"publication_date":"2026-03-24","ids":{"openalex":"https://openalex.org/W7140272158","doi":"https://doi.org/10.48550/arxiv.2603.23048"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.23048","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.23048","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.23048","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5125672542","display_name":"Zikang Huang","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Huang, Zikang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130613419","display_name":"Meng Ge","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ge, Meng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130552590","display_name":"Tianrui Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Tianrui","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5058869295","display_name":"Xuanchen Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Xuanchen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130563097","display_name":"Xiaobao Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Xiaobao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130543931","display_name":"Longbiao Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Longbiao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5122987540","display_name":"Jianwu Dang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dang, Jianwu","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5125672542"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.8823999762535095,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.8823999762535095,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.04639999940991402,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.017500000074505806,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/upsampling","display_name":"Upsampling","score":0.9161999821662903},{"id":"https://openalex.org/keywords/sampling","display_name":"Sampling (signal processing)","score":0.5536999702453613},{"id":"https://openalex.org/keywords/raw-data","display_name":"Raw data","score":0.3849000036716461},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.36880001425743103},{"id":"https://openalex.org/keywords/adaptation","display_name":"Adaptation (eye)","score":0.35089999437332153},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.33160001039505005},{"id":"https://openalex.org/keywords/decimation","display_name":"Decimation","score":0.3257000148296356}],"concepts":[{"id":"https://openalex.org/C110384440","wikidata":"https://www.wikidata.org/wiki/Q1143270","display_name":"Upsampling","level":3,"score":0.9161999821662903},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7415000200271606},{"id":"https://openalex.org/C140779682","wikidata":"https://www.wikidata.org/wiki/Q210868","display_name":"Sampling (signal processing)","level":3,"score":0.5536999702453613},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.507099986076355},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.41350001096725464},{"id":"https://openalex.org/C132964779","wikidata":"https://www.wikidata.org/wiki/Q2110223","display_name":"Raw data","level":2,"score":0.3849000036716461},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.36880001425743103},{"id":"https://openalex.org/C139807058","wikidata":"https://www.wikidata.org/wiki/Q352374","display_name":"Adaptation (eye)","level":2,"score":0.35089999437332153},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.33160001039505005},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.329800009727478},{"id":"https://openalex.org/C173642442","wikidata":"https://www.wikidata.org/wiki/Q1253346","display_name":"Decimation","level":3,"score":0.3257000148296356},{"id":"https://openalex.org/C88516994","wikidata":"https://www.wikidata.org/wiki/Q1268863","display_name":"Dynamic time warping","level":2,"score":0.32170000672340393},{"id":"https://openalex.org/C2781395549","wikidata":"https://www.wikidata.org/wiki/Q4680762","display_name":"Adaptive sampling","level":3,"score":0.30880001187324524},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.29409998655319214},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.28360000252723694},{"id":"https://openalex.org/C77618280","wikidata":"https://www.wikidata.org/wiki/Q1155772","display_name":"Scheme (mathematics)","level":2,"score":0.27239999175071716},{"id":"https://openalex.org/C157202957","wikidata":"https://www.wikidata.org/wiki/Q1659609","display_name":"Image warping","level":2,"score":0.2711000144481659},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.2678000032901764},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.26570001244544983},{"id":"https://openalex.org/C119666444","wikidata":"https://www.wikidata.org/wiki/Q5977280","display_name":"Temporal resolution","level":2,"score":0.25529998540878296}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.23048","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.23048","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.23048","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.23048","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.4820372462272644,"display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Self-supervised":[0],"learning":[1],"(SSL)":[2],"has":[3],"advanced":[4],"speech":[5,9,89,93],"processing.":[6],"However,":[7],"existing":[8,113],"SSL":[10],"methods":[11],"typically":[12],"assume":[13],"a":[14,35,50,64],"single":[15],"sampling":[16,61],"rate":[17],"and":[18,76,91,109,115],"struggle":[19],"with":[20,49],"mixed-rate":[21,74],"data":[22],"due":[23],"to":[24,63,82],"temporal":[25,66],"resolution":[26,67],"mismatch.":[27],"To":[28],"address":[29],"this":[30],"limitation,":[31],"we":[32,43],"propose":[33],"MSRHuBERT,":[34],"multi-sampling-rate":[36,51],"adaptive":[37,52],"pre-training":[38,75],"method.":[39],"Building":[40],"on":[41,88],"HuBERT,":[42],"replace":[44],"its":[45],"single-rate":[46],"downsampling":[47,53],"CNN":[48,54],"that":[55,117],"maps":[56],"raw":[57],"waveforms":[58],"from":[59],"different":[60],"rates":[62],"shared":[65],"without":[68],"resampling.":[69],"This":[70],"design":[71],"enables":[72],"unified":[73],"fine-tuning.":[77],"In":[78],"experiments":[79],"spanning":[80],"16":[81],"48":[83],"kHz,":[84],"MSRHuBERT":[85,104],"outperforms":[86],"HuBERT":[87,121],"recognition":[90],"full-band":[92],"reconstruction,":[94],"preserving":[95],"high-frequency":[96],"detail":[97],"while":[98],"modeling":[99],"low-frequency":[100],"semantic":[101],"structure.":[102],"Moreover,":[103],"retains":[105],"HuBERT's":[106],"mask-prediction":[107],"objective":[108],"Transformer":[110],"encoder,":[111],"so":[112],"analyses":[114],"improvements":[116],"were":[118],"developed":[119],"for":[120],"can":[122],"apply":[123],"directly.":[124]},"counts_by_year":[],"updated_date":"2026-03-26T06:10:45.909354","created_date":"2026-03-26T00:00:00"}
