{"id":"https://openalex.org/W3094637009","doi":"https://doi.org/10.1109/icassp39728.2021.9414227","title":"Joint Masked CPC And CTC Training For ASR","display_name":"Joint Masked CPC And CTC Training For ASR","publication_year":2021,"publication_date":"2021-05-13","ids":{"openalex":"https://openalex.org/W3094637009","doi":"https://doi.org/10.1109/icassp39728.2021.9414227","mag":"3094637009"},"language":"en","primary_location":{"id":"doi:10.1109/icassp39728.2021.9414227","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp39728.2021.9414227","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2011.00093","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5074491874","display_name":"Chaitanya Talnikar","orcid":null},"institutions":[{"id":"https://openalex.org/I4210114444","display_name":"Meta (United States)","ror":"https://ror.org/01zbnvs85","country_code":"US","type":"company","lineage":["https://openalex.org/I4210114444"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Chaitanya Talnikar","raw_affiliation_strings":["Facebook AI Research,New York, Menlo Park &#x0026; Paris,USA &amp; France","Facebook AI Research,New York, Menlo Park & Paris,USA & France"],"affiliations":[{"raw_affiliation_string":"Facebook AI Research,New York, Menlo Park &#x0026; Paris,USA &amp; France","institution_ids":["https://openalex.org/I4210114444"]},{"raw_affiliation_string":"Facebook AI Research,New York, Menlo Park & Paris,USA & France","institution_ids":["https://openalex.org/I4210114444"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5106542588","display_name":"Tatiana Likhomanenko","orcid":null},"institutions":[{"id":"https://openalex.org/I4210114444","display_name":"Meta (United States)","ror":"https://ror.org/01zbnvs85","country_code":"US","type":"company","lineage":["https://openalex.org/I4210114444"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Tatiana Likhomanenko","raw_affiliation_strings":["Facebook AI Research,New York, Menlo Park &#x0026; Paris,USA &amp; France","Facebook AI Research,New York, Menlo Park & Paris,USA & France"],"affiliations":[{"raw_affiliation_string":"Facebook AI Research,New York, Menlo Park &#x0026; Paris,USA &amp; France","institution_ids":["https://openalex.org/I4210114444"]},{"raw_affiliation_string":"Facebook AI Research,New York, Menlo Park & Paris,USA & France","institution_ids":["https://openalex.org/I4210114444"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5053915453","display_name":"Ronan Collobert","orcid":null},"institutions":[{"id":"https://openalex.org/I4210114444","display_name":"Meta (United States)","ror":"https://ror.org/01zbnvs85","country_code":"US","type":"company","lineage":["https://openalex.org/I4210114444"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ronan Collobert","raw_affiliation_strings":["Facebook AI Research,New York, Menlo Park &#x0026; Paris,USA &amp; France","Facebook AI Research,New York, Menlo Park & Paris,USA & France"],"affiliations":[{"raw_affiliation_string":"Facebook AI Research,New York, Menlo Park &#x0026; Paris,USA &amp; France","institution_ids":["https://openalex.org/I4210114444"]},{"raw_affiliation_string":"Facebook AI Research,New York, Menlo Park & Paris,USA & France","institution_ids":["https://openalex.org/I4210114444"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5041907084","display_name":"Gabriel Synnaeve","orcid":"https://orcid.org/0000-0003-1715-3356"},"institutions":[{"id":"https://openalex.org/I4210114444","display_name":"Meta (United States)","ror":"https://ror.org/01zbnvs85","country_code":"US","type":"company","lineage":["https://openalex.org/I4210114444"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Gabriel Synnaeve","raw_affiliation_strings":["Facebook AI Research,New York, Menlo Park &#x0026; Paris,USA &amp; France","Facebook AI Research,New York, Menlo Park & Paris,USA & France"],"affiliations":[{"raw_affiliation_string":"Facebook AI Research,New York, Menlo Park &#x0026; Paris,USA &amp; France","institution_ids":["https://openalex.org/I4210114444"]},{"raw_affiliation_string":"Facebook AI Research,New York, Menlo Park & Paris,USA & France","institution_ids":["https://openalex.org/I4210114444"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5074491874"],"corresponding_institution_ids":["https://openalex.org/I4210114444"],"apc_list":null,"apc_paid":null,"fwci":0.5644143,"has_fulltext":true,"cited_by_count":4,"citation_normalized_percentile":{"value":0.70917189,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"3045","last_page":"3049"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9948999881744385,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7961533665657043},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.635163426399231},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5758407711982727},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5373927354812622},{"id":"https://openalex.org/keywords/regularization","display_name":"Regularization (linguistics)","score":0.5365488529205322},{"id":"https://openalex.org/keywords/connectionism","display_name":"Connectionism","score":0.5292329788208008},{"id":"https://openalex.org/keywords/joint","display_name":"Joint (building)","score":0.5179449915885925},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.47642359137535095},{"id":"https://openalex.org/keywords/labeled-data","display_name":"Labeled data","score":0.4608457088470459},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.44582757353782654},{"id":"https://openalex.org/keywords/coding","display_name":"Coding (social sciences)","score":0.4363621473312378},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.37913233041763306},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.37041333317756653},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3584408164024353},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.16815680265426636}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7961533665657043},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.635163426399231},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5758407711982727},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5373927354812622},{"id":"https://openalex.org/C2776135515","wikidata":"https://www.wikidata.org/wiki/Q17143721","display_name":"Regularization (linguistics)","level":2,"score":0.5365488529205322},{"id":"https://openalex.org/C8521452","wikidata":"https://www.wikidata.org/wiki/Q203790","display_name":"Connectionism","level":3,"score":0.5292329788208008},{"id":"https://openalex.org/C18555067","wikidata":"https://www.wikidata.org/wiki/Q8375051","display_name":"Joint (building)","level":2,"score":0.5179449915885925},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.47642359137535095},{"id":"https://openalex.org/C2776145971","wikidata":"https://www.wikidata.org/wiki/Q30673951","display_name":"Labeled data","level":2,"score":0.4608457088470459},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.44582757353782654},{"id":"https://openalex.org/C179518139","wikidata":"https://www.wikidata.org/wiki/Q5140297","display_name":"Coding (social sciences)","level":2,"score":0.4363621473312378},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.37913233041763306},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.37041333317756653},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3584408164024353},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.16815680265426636},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C170154142","wikidata":"https://www.wikidata.org/wiki/Q150737","display_name":"Architectural engineering","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0}],"mesh":[],"locations_count":5,"locations":[{"id":"doi:10.1109/icassp39728.2021.9414227","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp39728.2021.9414227","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2011.00093","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2011.00093","pdf_url":"https://arxiv.org/pdf/2011.00093","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"public-domain","license_id":"https://openalex.org/licenses/public-domain","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"mag:3094637009","is_oa":true,"landing_page_url":"https://arxiv.org/pdf/2011.00093.pdf","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"doi:10.48550/arxiv.2011.00093","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2011.00093","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"},{"id":"doi:10.17023/md16-jy61","is_oa":true,"landing_page_url":"https://doi.org/10.17023/md16-jy61","pdf_url":null,"source":{"id":"https://openalex.org/S7407051697","display_name":"IEEE RESOURCE CENTERS","issn_l":null,"issn":[],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2011.00093","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2011.00093","pdf_url":"https://arxiv.org/pdf/2011.00093","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"public-domain","license_id":"https://openalex.org/licenses/public-domain","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.5299999713897705}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3094637009.pdf","grobid_xml":"https://content.openalex.org/works/W3094637009.grobid-xml"},"referenced_works_count":41,"referenced_works":["https://openalex.org/W1494198834","https://openalex.org/W1522301498","https://openalex.org/W1993660824","https://openalex.org/W2127141656","https://openalex.org/W2193413348","https://openalex.org/W2510867321","https://openalex.org/W2526425061","https://openalex.org/W2755891984","https://openalex.org/W2803405196","https://openalex.org/W2842511635","https://openalex.org/W2936774411","https://openalex.org/W2940180244","https://openalex.org/W2952976827","https://openalex.org/W2953190524","https://openalex.org/W2963341956","https://openalex.org/W2963403868","https://openalex.org/W2991213871","https://openalex.org/W2995181338","https://openalex.org/W2996159613","https://openalex.org/W2998532468","https://openalex.org/W3005680577","https://openalex.org/W3026041220","https://openalex.org/W3027083471","https://openalex.org/W3034781633","https://openalex.org/W3036601975","https://openalex.org/W3039910566","https://openalex.org/W3096338464","https://openalex.org/W3099782249","https://openalex.org/W6631190155","https://openalex.org/W6687566353","https://openalex.org/W6739901393","https://openalex.org/W6744261651","https://openalex.org/W6755207826","https://openalex.org/W6768080748","https://openalex.org/W6770506093","https://openalex.org/W6771137614","https://openalex.org/W6772883055","https://openalex.org/W6774314701","https://openalex.org/W6780218876","https://openalex.org/W6780483730","https://openalex.org/W6947929050"],"related_works":["https://openalex.org/W3160235762","https://openalex.org/W3207925072","https://openalex.org/W3151744761","https://openalex.org/W3162249256","https://openalex.org/W3015356564","https://openalex.org/W2939710050","https://openalex.org/W3026842484","https://openalex.org/W2988736778","https://openalex.org/W2939069254","https://openalex.org/W3133090729","https://openalex.org/W3094965760","https://openalex.org/W3003809177","https://openalex.org/W3015810689","https://openalex.org/W2148182949","https://openalex.org/W3132108706","https://openalex.org/W2898132662","https://openalex.org/W3175621790","https://openalex.org/W3201659743","https://openalex.org/W3044483536","https://openalex.org/W2137820324"],"abstract_inverted_index":{"Self-supervised":[0],"learning":[1,7],"(SSL)":[2],"has":[3],"shown":[4],"promise":[5],"in":[6],"representations":[8],"of":[9,37],"audio":[10],"that":[11,40,75,107],"are":[12],"useful":[13],"for":[14,83,115],"automatic":[15],"speech":[16],"recognition":[17],"(ASR).":[18],"But,":[19],"training":[20,36,78],"SSL":[21],"models":[22,39],"like":[23],"wav2vec~2.0":[24,98],"requires":[25],"a":[26,34,113],"two-stage":[27],"pipeline.":[28],"In":[29],"this":[30,76],"paper":[31],"we":[32,50,105],"demonstrate":[33],"single-stage":[35],"ASR":[38,86],"can":[41],"utilize":[42],"both":[43],"unlabeled":[44],"and":[45,63],"labeled":[46],"data.":[47],"During":[48],"training,":[49],"alternately":[51],"minimize":[52],"two":[53],"losses:":[54],"an":[55],"unsupervised":[56,89],"masked":[57],"Contrastive":[58],"Predictive":[59],"Coding":[60],"(CPC)":[61],"loss":[62,68],"the":[64,84,100,109,116],"supervised":[65,117],"audio-to-text":[66],"alignment":[67],"Connectionist":[69],"Temporal":[70],"Classification":[71],"(CTC).":[72],"We":[73],"show":[74],"joint":[77],"method":[79],"directly":[80],"optimizes":[81],"performance":[82],"downstream":[85],"task":[87,111],"using":[88],"data":[90],"while":[91],"achieving":[92],"similar":[93],"word":[94],"error":[95],"rates":[96],"to":[97],"on":[99],"Librispeech":[101],"100-hour":[102],"dataset.":[103],"Finally,":[104],"postulate":[106],"solving":[108],"contrastive":[110],"is":[112],"regularization":[114],"CTC":[118],"loss.":[119]},"counts_by_year":[{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":3}],"updated_date":"2026-02-09T09:26:11.010843","created_date":"2025-10-10T00:00:00"}
