{"id":"https://openalex.org/W4415214233","doi":"https://doi.org/10.3389/frcmn.2025.1662788","title":"Sentence-level consistency of conformer based pre-training distillation for Chinese speech recognition","display_name":"Sentence-level consistency of conformer based pre-training distillation for Chinese speech recognition","publication_year":2025,"publication_date":"2025-10-15","ids":{"openalex":"https://openalex.org/W4415214233","doi":"https://doi.org/10.3389/frcmn.2025.1662788"},"language":"en","primary_location":{"id":"doi:10.3389/frcmn.2025.1662788","is_oa":true,"landing_page_url":"https://doi.org/10.3389/frcmn.2025.1662788","pdf_url":"https://www.frontiersin.org/journals/communications-and-networks/articles/10.3389/frcmn.2025.1662788/pdf","source":{"id":"https://openalex.org/S4210213607","display_name":"Frontiers in Communications and Networks","issn_l":"2673-530X","issn":["2673-530X"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320527","host_organization_name":"Frontiers Media","host_organization_lineage":["https://openalex.org/P4310320527"],"host_organization_lineage_names":["Frontiers Media"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Frontiers in Communications and Networks","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.frontiersin.org/journals/communications-and-networks/articles/10.3389/frcmn.2025.1662788/pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101528672","display_name":"Haifang Li","orcid":"https://orcid.org/0009-0004-9924-9603"},"institutions":[{"id":"https://openalex.org/I1334729051","display_name":"Xinjiang Normal University","ror":"https://ror.org/00ndrvk93","country_code":"CN","type":"education","lineage":["https://openalex.org/I1334729051"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Haifang Li","raw_affiliation_strings":["School of Computer Science and Technology, Xinjiang Normal University, Xinjiang, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Xinjiang Normal University, Xinjiang, China","institution_ids":["https://openalex.org/I1334729051"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101876190","display_name":"Chao Tang","orcid":"https://orcid.org/0000-0002-5881-1119"},"institutions":[{"id":"https://openalex.org/I1334729051","display_name":"Xinjiang Normal University","ror":"https://ror.org/00ndrvk93","country_code":"CN","type":"education","lineage":["https://openalex.org/I1334729051"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chao Tang","raw_affiliation_strings":["School of Computer Science and Technology, Xinjiang Normal University, Xinjiang, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Xinjiang Normal University, Xinjiang, China","institution_ids":["https://openalex.org/I1334729051"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101908372","display_name":"Xin Yue","orcid":"https://orcid.org/0000-0003-4202-3772"},"institutions":[{"id":"https://openalex.org/I1334729051","display_name":"Xinjiang Normal University","ror":"https://ror.org/00ndrvk93","country_code":"CN","type":"education","lineage":["https://openalex.org/I1334729051"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xin Yue","raw_affiliation_strings":["School of Computer Science and Technology, Xinjiang Normal University, Xinjiang, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Xinjiang Normal University, Xinjiang, China","institution_ids":["https://openalex.org/I1334729051"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100342424","display_name":"Xu Li","orcid":"https://orcid.org/0000-0001-8902-4989"},"institutions":[{"id":"https://openalex.org/I1334729051","display_name":"Xinjiang Normal University","ror":"https://ror.org/00ndrvk93","country_code":"CN","type":"education","lineage":["https://openalex.org/I1334729051"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xu Li","raw_affiliation_strings":["School of Computer Science and Technology, Xinjiang Normal University, Xinjiang, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Xinjiang Normal University, Xinjiang, China","institution_ids":["https://openalex.org/I1334729051"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5101528672"],"corresponding_institution_ids":["https://openalex.org/I1334729051"],"apc_list":{"value":1900,"currency":"USD","value_usd":1900},"apc_paid":{"value":1900,"currency":"USD","value_usd":1900},"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.13986792,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"6","issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9977999925613403,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10403","display_name":"Phonetics and Phonology Research","score":0.9868000149726868,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.7822999954223633},{"id":"https://openalex.org/keywords/consistency","display_name":"Consistency (knowledge bases)","score":0.6563000082969666},{"id":"https://openalex.org/keywords/distillation","display_name":"Distillation","score":0.5663999915122986},{"id":"https://openalex.org/keywords/construct","display_name":"Construct (python library)","score":0.4345000088214874},{"id":"https://openalex.org/keywords/sequential-consistency","display_name":"Sequential consistency","score":0.310699999332428}],"concepts":[{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.7822999954223633},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7594000101089478},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.6563000082969666},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5827999711036682},{"id":"https://openalex.org/C204030448","wikidata":"https://www.wikidata.org/wiki/Q101017","display_name":"Distillation","level":2,"score":0.5663999915122986},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.46869999170303345},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4634999930858612},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.4345000088214874},{"id":"https://openalex.org/C82029504","wikidata":"https://www.wikidata.org/wiki/Q4373882","display_name":"Sequential consistency","level":4,"score":0.310699999332428},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.30239999294281006},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2734000086784363},{"id":"https://openalex.org/C110157686","wikidata":"https://www.wikidata.org/wiki/Q922122","display_name":"Broadcasting (networking)","level":2,"score":0.2660999894142151}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.3389/frcmn.2025.1662788","is_oa":true,"landing_page_url":"https://doi.org/10.3389/frcmn.2025.1662788","pdf_url":"https://www.frontiersin.org/journals/communications-and-networks/articles/10.3389/frcmn.2025.1662788/pdf","source":{"id":"https://openalex.org/S4210213607","display_name":"Frontiers in Communications and Networks","issn_l":"2673-530X","issn":["2673-530X"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320527","host_organization_name":"Frontiers Media","host_organization_lineage":["https://openalex.org/P4310320527"],"host_organization_lineage_names":["Frontiers Media"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Frontiers in Communications and Networks","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:91c5815259cd4d4eb192d80dfde816e9","is_oa":true,"landing_page_url":"https://doaj.org/article/91c5815259cd4d4eb192d80dfde816e9","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Frontiers in Communications and Networks, Vol 6 (2025)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.3389/frcmn.2025.1662788","is_oa":true,"landing_page_url":"https://doi.org/10.3389/frcmn.2025.1662788","pdf_url":"https://www.frontiersin.org/journals/communications-and-networks/articles/10.3389/frcmn.2025.1662788/pdf","source":{"id":"https://openalex.org/S4210213607","display_name":"Frontiers in Communications and Networks","issn_l":"2673-530X","issn":["2673-530X"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320527","host_organization_name":"Frontiers Media","host_organization_lineage":["https://openalex.org/P4310320527"],"host_organization_lineage_names":["Frontiers Media"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Frontiers in Communications and Networks","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320325405","display_name":"Xinjiang Normal University","ror":"https://ror.org/00ndrvk93"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4415214233.pdf","grobid_xml":"https://content.openalex.org/works/W4415214233.grobid-xml"},"referenced_works_count":30,"referenced_works":["https://openalex.org/W1593271688","https://openalex.org/W2148154194","https://openalex.org/W2593463961","https://openalex.org/W2889282842","https://openalex.org/W2972818416","https://openalex.org/W3007328579","https://openalex.org/W3046667470","https://openalex.org/W3097777922","https://openalex.org/W3107716502","https://openalex.org/W3156828761","https://openalex.org/W3204696009","https://openalex.org/W4236887893","https://openalex.org/W4250875505","https://openalex.org/W4297841364","https://openalex.org/W4297841655","https://openalex.org/W4372270126","https://openalex.org/W4385486090","https://openalex.org/W4388017359","https://openalex.org/W4390815280","https://openalex.org/W4393407971","https://openalex.org/W4396767577","https://openalex.org/W4398162650","https://openalex.org/W4402112144","https://openalex.org/W4402703105","https://openalex.org/W4404238212","https://openalex.org/W4404782655","https://openalex.org/W4406892576","https://openalex.org/W4408355618","https://openalex.org/W4412428247","https://openalex.org/W4412719254"],"related_works":[],"abstract_inverted_index":{"Introduction":[0],"We":[1,25,38],"address":[2],"robustness":[3,104],"and":[4,51,54,59,83,109],"efficiency":[5,111],"in":[6],"Chinese":[7,46],"automatic":[8],"speech":[9,16,108],"recognition":[10],"(ASR),":[11],"focusing":[12],"on":[13,56,78,81,85],"long-form":[14,106],"broadcast":[15,50,107],"where":[17],"sentence-level":[18,32,97],"semantic":[19,98],"consistency":[20,33],"is":[21],"often":[22],"lost.":[23],"Methods":[24],"propose":[26],"a":[27,44],"Conformer-based":[28],"framework":[29],"that":[30],"integrates":[31],"with":[34,101],"pre-training":[35],"knowledge":[36],"distillation.":[37],"also":[39],"construct":[40],"CH":[41,60,86],"Broadcast":[42,61,87],"ASR,":[43,88],"domain-specific":[45],"corpus":[47],"for":[48,105,112],"the":[49],"television":[52],"domain,":[53],"evaluate":[55],"AISHELL-1,":[57,79],"AISHELL-3,":[58,82],"ASR.":[62],"Results":[63],"The":[64],"proposed":[65],"model":[66,91],"consistently":[67],"outperforms":[68],"strong":[69],"baselines":[70],"(TDNN,":[71],"DFSMN-T,":[72],"TCN-Transformer),":[73],"achieving":[74],"CER":[75],"=":[76],"3.3%":[77],"3.7%":[80],"3.9%":[84],"while":[89],"reducing":[90],"size":[92],"by":[93],"&amp;gt;10%.":[94],"Discussion":[95],"Enforcing":[96],"alignment":[99],"together":[100],"distillation":[102],"improves":[103],"enhances":[110],"real-time":[113],"deployment.":[114]},"counts_by_year":[],"updated_date":"2026-05-21T06:26:12.895304","created_date":"2025-10-16T00:00:00"}
