{"id":"https://openalex.org/W4375869113","doi":"https://doi.org/10.1109/icassp49357.2023.10094922","title":"CTCBERT: Advancing Hidden-Unit Bert with CTC Objectives","display_name":"CTCBERT: Advancing Hidden-Unit Bert with CTC Objectives","publication_year":2023,"publication_date":"2023-05-05","ids":{"openalex":"https://openalex.org/W4375869113","doi":"https://doi.org/10.1109/icassp49357.2023.10094922"},"language":"en","primary_location":{"id":"doi:10.1109/icassp49357.2023.10094922","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49357.2023.10094922","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5080637906","display_name":"Ruchao Fan","orcid":"https://orcid.org/0000-0001-5021-2747"},"institutions":[{"id":"https://openalex.org/I161318765","display_name":"University of California, Los Angeles","ror":"https://ror.org/046rm7j60","country_code":"US","type":"education","lineage":["https://openalex.org/I161318765"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Ruchao Fan","raw_affiliation_strings":["University of California,Los Angeles"],"affiliations":[{"raw_affiliation_string":"University of California,Los Angeles","institution_ids":["https://openalex.org/I161318765"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100377935","display_name":"Yiming Wang","orcid":"https://orcid.org/0000-0002-5932-4371"},"institutions":[{"id":"https://openalex.org/I4210105678","display_name":"Microsoft (Finland)","ror":"https://ror.org/01nehjf29","country_code":"FI","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210105678"]}],"countries":["FI"],"is_corresponding":false,"raw_author_name":"Yiming Wang","raw_affiliation_strings":["Microsoft Corporation"],"affiliations":[{"raw_affiliation_string":"Microsoft Corporation","institution_ids":["https://openalex.org/I4210105678"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034136587","display_name":"Yashesh Gaur","orcid":null},"institutions":[{"id":"https://openalex.org/I4210105678","display_name":"Microsoft (Finland)","ror":"https://ror.org/01nehjf29","country_code":"FI","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210105678"]}],"countries":["FI"],"is_corresponding":false,"raw_author_name":"Yashesh Gaur","raw_affiliation_strings":["Microsoft Corporation"],"affiliations":[{"raw_affiliation_string":"Microsoft Corporation","institution_ids":["https://openalex.org/I4210105678"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100365053","display_name":"Jinyu Li","orcid":"https://orcid.org/0000-0002-1089-9748"},"institutions":[{"id":"https://openalex.org/I4210105678","display_name":"Microsoft (Finland)","ror":"https://ror.org/01nehjf29","country_code":"FI","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210105678"]}],"countries":["FI"],"is_corresponding":false,"raw_author_name":"Jinyu Li","raw_affiliation_strings":["Microsoft Corporation"],"affiliations":[{"raw_affiliation_string":"Microsoft Corporation","institution_ids":["https://openalex.org/I4210105678"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5080637906"],"corresponding_institution_ids":["https://openalex.org/I161318765"],"apc_list":null,"apc_paid":null,"fwci":0.8728,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.77890904,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/connectionism","display_name":"Connectionism","score":0.7632830739021301},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7420321702957153},{"id":"https://openalex.org/keywords/blank","display_name":"Blank","score":0.5430859923362732},{"id":"https://openalex.org/keywords/cross-entropy","display_name":"Cross entropy","score":0.48904284834861755},{"id":"https://openalex.org/keywords/unit","display_name":"Unit (ring theory)","score":0.4879297912120819},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.47841960191726685},{"id":"https://openalex.org/keywords/entropy","display_name":"Entropy (arrow of time)","score":0.46590906381607056},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.45744961500167847},{"id":"https://openalex.org/keywords/frame","display_name":"Frame (networking)","score":0.4448162913322449},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4191742241382599},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.38264864683151245},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3029613494873047},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.2982943058013916},{"id":"https://openalex.org/keywords/computer-network","display_name":"Computer network","score":0.1142871081829071},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.0915488600730896},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.08328643441200256}],"concepts":[{"id":"https://openalex.org/C8521452","wikidata":"https://www.wikidata.org/wiki/Q203790","display_name":"Connectionism","level":3,"score":0.7632830739021301},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7420321702957153},{"id":"https://openalex.org/C2778089247","wikidata":"https://www.wikidata.org/wiki/Q368951","display_name":"Blank","level":2,"score":0.5430859923362732},{"id":"https://openalex.org/C167981619","wikidata":"https://www.wikidata.org/wiki/Q1685498","display_name":"Cross entropy","level":3,"score":0.48904284834861755},{"id":"https://openalex.org/C122637931","wikidata":"https://www.wikidata.org/wiki/Q118084","display_name":"Unit (ring theory)","level":2,"score":0.4879297912120819},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.47841960191726685},{"id":"https://openalex.org/C106301342","wikidata":"https://www.wikidata.org/wiki/Q4117933","display_name":"Entropy (arrow of time)","level":2,"score":0.46590906381607056},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.45744961500167847},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.4448162913322449},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4191742241382599},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.38264864683151245},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3029613494873047},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.2982943058013916},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.1142871081829071},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0915488600730896},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.08328643441200256},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C145420912","wikidata":"https://www.wikidata.org/wiki/Q853077","display_name":"Mathematics education","level":1,"score":0.0},{"id":"https://openalex.org/C78519656","wikidata":"https://www.wikidata.org/wiki/Q101333","display_name":"Mechanical engineering","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp49357.2023.10094922","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49357.2023.10094922","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":34,"referenced_works":["https://openalex.org/W1494198834","https://openalex.org/W2127141656","https://openalex.org/W2514741789","https://openalex.org/W2888867175","https://openalex.org/W2889068726","https://openalex.org/W2896457183","https://openalex.org/W2908510526","https://openalex.org/W2933138175","https://openalex.org/W2953190524","https://openalex.org/W2972943112","https://openalex.org/W2973049979","https://openalex.org/W2982223350","https://openalex.org/W3015265920","https://openalex.org/W3016011332","https://openalex.org/W3036601975","https://openalex.org/W3041561163","https://openalex.org/W3097286738","https://openalex.org/W3160345865","https://openalex.org/W3204696009","https://openalex.org/W3209059054","https://openalex.org/W3209984917","https://openalex.org/W4224934179","https://openalex.org/W4226278833","https://openalex.org/W4226507725","https://openalex.org/W4281492411","https://openalex.org/W4281672148","https://openalex.org/W4283324001","https://openalex.org/W4287889722","https://openalex.org/W4293793697","https://openalex.org/W4297841844","https://openalex.org/W6755207826","https://openalex.org/W6757817989","https://openalex.org/W6780218876","https://openalex.org/W6838909421"],"related_works":["https://openalex.org/W2361638505","https://openalex.org/W2370352440","https://openalex.org/W2009954581","https://openalex.org/W4296141694","https://openalex.org/W2379220204","https://openalex.org/W4249926107","https://openalex.org/W2005071119","https://openalex.org/W3158546193","https://openalex.org/W4385386361","https://openalex.org/W2994927414"],"abstract_inverted_index":{"In":[0],"this":[1],"work,":[2],"we":[3],"present":[4],"a":[5,18],"simple":[6],"but":[7],"effective":[8],"method,":[9],"CTCBERT,":[10],"for":[11],"advancing":[12],"hidden-unit":[13],"BERT":[14],"(HuBERT).":[15],"HuBERT":[16,94,96,137],"applies":[17],"frame-level":[19],"cross-entropy":[20],"(CE)":[21],"loss,":[22],"which":[23],"is":[24],"similar":[25],"to":[26,107],"most":[27],"acoustic":[28],"model":[29,35],"training.":[30,110],"However,":[31],"CTCBERT":[32,90,133],"performs":[33],"the":[34,38,56,108,124,128],"training":[36,102],"with":[37,79],"Connectionist":[39],"Temporal":[40],"Classification":[41],"(CTC)":[42],"objective":[43],"after":[44],"removing":[45],"duplicated":[46],"IDs":[47,92],"in":[48,64],"each":[49],"masked":[50],"region.":[51],"The":[52,100],"idea":[53],"stems":[54],"from":[55,93],"observation":[57],"that":[58,77],"there":[59],"can":[60,81],"be":[61,82],"significant":[62],"errors":[63],"alignments":[65,74],"when":[66,85,112],"using":[67],"clustered":[68],"or":[69],"aligned":[70],"IDs.":[71],"CTC":[72,80,101],"learns":[73],"implicitly,":[75],"indicating":[76],"learning":[78],"more":[83],"flexible":[84],"misalignment":[86],"exists.":[87],"We":[88],"examine":[89],"on":[91,123,140],"Iter1,":[95],"Iter2,":[97],"and":[98,138],"PBERT.":[99],"brings":[103],"consistent":[104],"improvements":[105,119,131],"compared":[106],"CE":[109],"Furthermore,":[111],"loading":[113],"blank-related":[114],"parameters":[115],"during":[116],"finetuning,":[117],"slight":[118],"are":[120,134],"observed.":[121],"Evaluated":[122],"Librispeech":[125],"960-100h":[126],"setting,":[127],"relative":[129],"WER":[130],"of":[132],"2%-11%":[135],"over":[136],"PERT":[139],"test-other":[141],"data.":[142]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
