{"id":"https://openalex.org/W4226507725","doi":"https://doi.org/10.21437/interspeech.2022-981","title":"Speech Pre-training with Acoustic Piece","display_name":"Speech Pre-training with Acoustic Piece","publication_year":2022,"publication_date":"2022-09-16","ids":{"openalex":"https://openalex.org/W4226507725","doi":"https://doi.org/10.21437/interspeech.2022-981"},"language":"en","primary_location":{"id":"doi:10.21437/interspeech.2022-981","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2022-981","pdf_url":null,"source":{"id":"https://openalex.org/S4363604309","display_name":"Interspeech 2022","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2022","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102205493","display_name":"Shuo Ren","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Shuo Ren","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101635405","display_name":"Shujie Liu","orcid":"https://orcid.org/0009-0008-0785-8882"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shujie Liu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100324098","display_name":"Yu Wu","orcid":"https://orcid.org/0000-0002-1680-8253"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yu Wu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5106557565","display_name":"Long Zhou","orcid":"https://orcid.org/0009-0006-1919-4943"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Long Zhou","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5014662947","display_name":"Furu Wei","orcid":"https://orcid.org/0000-0002-7810-5852"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Furu Wei","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5102205493"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.155,"has_fulltext":false,"cited_by_count":11,"citation_normalized_percentile":{"value":0.79716981,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"2648","last_page":"2652"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9955000281333923,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7875146865844727},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6960257291793823},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.595604419708252},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.5257676243782043},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.4741120934486389},{"id":"https://openalex.org/keywords/acoustic-model","display_name":"Acoustic model","score":0.4692756235599518},{"id":"https://openalex.org/keywords/hidden-markov-model","display_name":"Hidden Markov model","score":0.45186737179756165},{"id":"https://openalex.org/keywords/sentence","display_name":"Sentence","score":0.4508233070373535},{"id":"https://openalex.org/keywords/security-token","display_name":"Security token","score":0.448697566986084},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.43160712718963623},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3885924518108368},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.38590654730796814},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.3109397292137146}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7875146865844727},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6960257291793823},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.595604419708252},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.5257676243782043},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.4741120934486389},{"id":"https://openalex.org/C155635449","wikidata":"https://www.wikidata.org/wiki/Q4674699","display_name":"Acoustic model","level":3,"score":0.4692756235599518},{"id":"https://openalex.org/C23224414","wikidata":"https://www.wikidata.org/wiki/Q176769","display_name":"Hidden Markov model","level":2,"score":0.45186737179756165},{"id":"https://openalex.org/C2777530160","wikidata":"https://www.wikidata.org/wiki/Q41796","display_name":"Sentence","level":2,"score":0.4508233070373535},{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.448697566986084},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.43160712718963623},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3885924518108368},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.38590654730796814},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.3109397292137146},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.21437/interspeech.2022-981","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2022-981","pdf_url":null,"source":{"id":"https://openalex.org/S4363604309","display_name":"Interspeech 2022","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2022","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.6200000047683716,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2121652828","https://openalex.org/W3033124456","https://openalex.org/W2992378684","https://openalex.org/W2964829415","https://openalex.org/W2122233706","https://openalex.org/W642007152","https://openalex.org/W2105439218","https://openalex.org/W2131711534","https://openalex.org/W3143423642","https://openalex.org/W2341426843"],"abstract_inverted_index":{"Previous":[0],"speech":[1,135],"pre-training":[2],"methods,":[3],"such":[4,132],"as":[5,34,114,133],"wav2vec2.0":[6],"and":[7,61,125],"HuBERT,":[8],"pre-train":[9,85],"a":[10],"Transformer":[11],"encoder":[12],"to":[13,58,79,83],"learn":[14],"deep":[15],"representations":[16],"from":[17,25,102],"audio":[18,124],"data,":[19],"with":[20,37,70],"objectives":[21],"predicting":[22],"either":[23],"elements":[24,45],"latent":[26],"vector":[27],"quantized":[28],"space":[29],"or":[30,46],"pre-generated":[31],"labels":[32],"(known":[33],"target":[35,64],"codes)":[36,47],"offline":[38],"clustering.":[39],"However,":[40],"those":[41,81],"training":[42,116],"signals":[43],"(quantized":[44],"are":[48,101],"independent":[49],"across":[50],"different":[51],"tokens":[52],"without":[53],"considering":[54,88],"their":[55],"relations.":[56],"According":[57],"our":[59,141],"observation":[60],"analysis,":[62],"the":[63,86,89,92,103,111,115,122,149],"codes":[65],"share":[66],"obvious":[67],"patterns":[68,82,95],"aligned":[69],"phonemized":[71],"text":[72],"data.":[73],"Based":[74],"on":[75,148],"that,":[76],"we":[77,96,118],"propose":[78],"leverage":[80],"better":[84],"model":[87],"relations":[90],"among":[91],"codes.":[93,109],"The":[94],"extracted,":[97],"called":[98],"\"acoustic":[99],"piece\"s,":[100],"sentence":[104],"piece":[105,113],"result":[106],"of":[107],"HuBERT":[108],"With":[110],"acoustic":[112],"signal,":[117],"can":[119],"implicitly":[120],"bridge":[121],"input":[123],"natural":[126],"language,":[127],"which":[128],"benefits":[129],"audio-to-text":[130],"tasks,":[131],"automatic":[134],"recognition":[136],"(ASR).":[137],"Simple":[138],"but":[139],"effective,":[140],"method":[142],"\"HuBERT-AP\"":[143],"significantly":[144],"outperforms":[145],"strong":[146],"baselines":[147],"LibriSpeech":[150],"ASR":[151],"task.":[152]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":4},{"year":2023,"cited_by_count":4},{"year":2022,"cited_by_count":1}],"updated_date":"2026-03-09T08:58:05.943551","created_date":"2025-10-10T00:00:00"}
