{"id":"https://openalex.org/W4224925075","doi":"https://doi.org/10.1109/icassp43922.2022.9746821","title":"Investigating Sequence-Level Normalisation For CTC-Like End-to-End ASR","display_name":"Investigating Sequence-Level Normalisation For CTC-Like End-to-End ASR","publication_year":2022,"publication_date":"2022-04-27","ids":{"openalex":"https://openalex.org/W4224925075","doi":"https://doi.org/10.1109/icassp43922.2022.9746821"},"language":"en","primary_location":{"id":"doi:10.1109/icassp43922.2022.9746821","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp43922.2022.9746821","pdf_url":null,"source":{"id":"https://openalex.org/S4363607702","display_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://www.research.ed.ac.uk/en/publications/f3e15774-9c16-4e31-a6e3-84947d954280","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5069625526","display_name":"Zeyu Zhao","orcid":"https://orcid.org/0000-0002-4070-2694"},"institutions":[{"id":"https://openalex.org/I98677209","display_name":"University of Edinburgh","ror":"https://ror.org/01nrxwf90","country_code":"GB","type":"education","lineage":["https://openalex.org/I98677209"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Zeyu Zhao","raw_affiliation_strings":["University of Edinburgh,Centre for Speech Technology Research,UK","Centre for Speech Technology Research, University of Edinburgh, UK"],"affiliations":[{"raw_affiliation_string":"University of Edinburgh,Centre for Speech Technology Research,UK","institution_ids":["https://openalex.org/I98677209"]},{"raw_affiliation_string":"Centre for Speech Technology Research, University of Edinburgh, UK","institution_ids":["https://openalex.org/I98677209"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5102911387","display_name":"Peter Bell","orcid":"https://orcid.org/0000-0002-9597-9615"},"institutions":[{"id":"https://openalex.org/I98677209","display_name":"University of Edinburgh","ror":"https://ror.org/01nrxwf90","country_code":"GB","type":"education","lineage":["https://openalex.org/I98677209"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Peter Bell","raw_affiliation_strings":["University of Edinburgh,Centre for Speech Technology Research,UK","Centre for Speech Technology Research, University of Edinburgh, UK"],"affiliations":[{"raw_affiliation_string":"University of Edinburgh,Centre for Speech Technology Research,UK","institution_ids":["https://openalex.org/I98677209"]},{"raw_affiliation_string":"Centre for Speech Technology Research, University of Edinburgh, UK","institution_ids":["https://openalex.org/I98677209"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5069625526"],"corresponding_institution_ids":["https://openalex.org/I98677209"],"apc_list":null,"apc_paid":null,"fwci":0.7276,"has_fulltext":false,"cited_by_count":7,"citation_normalized_percentile":{"value":0.69731739,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"7792","last_page":"7796"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.7304881811141968},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7174074053764343},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.618593692779541},{"id":"https://openalex.org/keywords/connectionism","display_name":"Connectionism","score":0.5985227823257446},{"id":"https://openalex.org/keywords/end-to-end-principle","display_name":"End-to-end principle","score":0.5524553060531616},{"id":"https://openalex.org/keywords/topology","display_name":"Topology (electrical circuits)","score":0.5471557378768921},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.5100911259651184},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.45482495427131653},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.4498044550418854},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.43814465403556824},{"id":"https://openalex.org/keywords/network-topology","display_name":"Network topology","score":0.4303407073020935},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.36539342999458313},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.35909897089004517},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.24441230297088623},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.14365071058273315},{"id":"https://openalex.org/keywords/computer-network","display_name":"Computer network","score":0.134733647108078}],"concepts":[{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.7304881811141968},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7174074053764343},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.618593692779541},{"id":"https://openalex.org/C8521452","wikidata":"https://www.wikidata.org/wiki/Q203790","display_name":"Connectionism","level":3,"score":0.5985227823257446},{"id":"https://openalex.org/C74296488","wikidata":"https://www.wikidata.org/wiki/Q2527392","display_name":"End-to-end principle","level":2,"score":0.5524553060531616},{"id":"https://openalex.org/C184720557","wikidata":"https://www.wikidata.org/wiki/Q7825049","display_name":"Topology (electrical circuits)","level":2,"score":0.5471557378768921},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.5100911259651184},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.45482495427131653},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.4498044550418854},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.43814465403556824},{"id":"https://openalex.org/C199845137","wikidata":"https://www.wikidata.org/wiki/Q145490","display_name":"Network topology","level":2,"score":0.4303407073020935},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.36539342999458313},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.35909897089004517},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.24441230297088623},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.14365071058273315},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.134733647108078},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.0},{"id":"https://openalex.org/C78458016","wikidata":"https://www.wikidata.org/wiki/Q840400","display_name":"Evolutionary biology","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/icassp43922.2022.9746821","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp43922.2022.9746821","pdf_url":null,"source":{"id":"https://openalex.org/S4363607702","display_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},{"id":"pmh:oai:pure.ed.ac.uk:openaire/f3e15774-9c16-4e31-a6e3-84947d954280","is_oa":true,"landing_page_url":"https://www.research.ed.ac.uk/en/publications/f3e15774-9c16-4e31-a6e3-84947d954280","pdf_url":null,"source":{"id":"https://openalex.org/S4406922455","display_name":"Edinburgh Research Explorer","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Zhao, Z & Bell, P 2022, Investigating Sequence-Level Normalisation for CTC-Like End-To-End ASR. in Proceedings of 2022 IEEE International Conference on Acoustics, Speech and Signal Processing. International Conference on Acoustics, Speech, and Signal Processing (ICASSP), Institute of Electrical and Electronics Engineers, pp. 7792-7796, 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP) , 7/05/22. https://doi.org/10.1109/ICASSP43922.2022.9746821","raw_type":"contributionToPeriodical"},{"id":"pmh:oai:pure.ed.ac.uk:publications/f3e15774-9c16-4e31-a6e3-84947d954280","is_oa":false,"landing_page_url":"http://hdl.handle.net/20.500.11820/f3e15774-9c16-4e31-a6e3-84947d954280","pdf_url":null,"source":{"id":"https://openalex.org/S4406922455","display_name":"Edinburgh Research Explorer","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":""}],"best_oa_location":{"id":"pmh:oai:pure.ed.ac.uk:openaire/f3e15774-9c16-4e31-a6e3-84947d954280","is_oa":true,"landing_page_url":"https://www.research.ed.ac.uk/en/publications/f3e15774-9c16-4e31-a6e3-84947d954280","pdf_url":null,"source":{"id":"https://openalex.org/S4406922455","display_name":"Edinburgh Research Explorer","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Zhao, Z & Bell, P 2022, Investigating Sequence-Level Normalisation for CTC-Like End-To-End ASR. in Proceedings of 2022 IEEE International Conference on Acoustics, Speech and Signal Processing. International Conference on Acoustics, Speech, and Signal Processing (ICASSP), Institute of Electrical and Electronics Engineers, pp. 7792-7796, 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP) , 7/05/22. https://doi.org/10.1109/ICASSP43922.2022.9746821","raw_type":"contributionToPeriodical"},"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16","score":0.5899999737739563}],"awards":[],"funders":[{"id":"https://openalex.org/F4320322183","display_name":"Huawei Technologies","ror":"https://ror.org/00cmhce21"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":24,"referenced_works":["https://openalex.org/W1524333225","https://openalex.org/W1710082047","https://openalex.org/W1828163288","https://openalex.org/W2064675550","https://openalex.org/W2105594594","https://openalex.org/W2125838338","https://openalex.org/W2131342762","https://openalex.org/W2160815625","https://openalex.org/W2327501763","https://openalex.org/W2514741789","https://openalex.org/W2697044473","https://openalex.org/W2746440465","https://openalex.org/W2889282842","https://openalex.org/W2962780374","https://openalex.org/W2963250244","https://openalex.org/W3016120074","https://openalex.org/W3097882114","https://openalex.org/W3162249256","https://openalex.org/W4205130185","https://openalex.org/W4295312788","https://openalex.org/W6631362777","https://openalex.org/W6638749077","https://openalex.org/W6766978945","https://openalex.org/W7027429494"],"related_works":["https://openalex.org/W4205841273","https://openalex.org/W4205525690","https://openalex.org/W1732468982","https://openalex.org/W1761388607","https://openalex.org/W1997922073","https://openalex.org/W350032239","https://openalex.org/W2604685715","https://openalex.org/W2412160900","https://openalex.org/W2136453575","https://openalex.org/W1596913645"],"abstract_inverted_index":{"End-to-end":[0],"Automatic":[1],"Speech":[2],"Recognition":[3],"(E2E":[4],"ASR)":[5],"significantly":[6],"simplifies":[7],"the":[8,22,74,81,85,97,106,109,129,133,136,143,149,166],"training":[9],"process":[10],"of":[11,21,76,84,108,132,145],"an":[12],"ASR":[13,28,70],"model.":[14],"Connectionist":[15],"Temporal":[16],"Classification":[17],"(CTC)":[18],"is":[19,37,102,159,168],"one":[20],"most":[23],"popular":[24],"methods":[25],"for":[26,40,68,170],"E2E":[27,69],"training.":[29,172],"Implicitly,":[30],"CTC":[31,89],"has":[32],"a":[33,64,99],"unique":[34],"topology":[35,75],"which":[36,104],"very":[38],"useful":[39],"sequence":[41,151,171],"modelling.":[42],"However,":[43],"we":[44,52,62,95,119],"find":[45],"that":[46,80,157,165],"by":[47,72],"changing":[48],"to":[49,114,127,140],"another":[50],"topology,":[51,98],"can":[53,90],"make":[54],"it":[55],"even":[56],"more":[57,160],"effective.":[58],"In":[59,125],"this":[60],"paper,":[61],"propose":[63],"new":[65],"CTC-like":[66],"method,":[67],"training,":[71],"modifying":[73],"original":[77],"CTC,":[78,163],"so":[79],"well-known":[82],"abuse":[83],"blank":[86],"label":[87],"in":[88],"be":[91],"resolved":[92],"theoretically.":[93],"As":[94],"change":[96],"normalisation":[100,137,167],"term":[101],"necessary,":[103],"makes":[105],"form":[107],"final":[110],"loss":[111],"function":[112],"similar":[113],"Maximum":[115],"Mutual":[116],"Information":[117],"(MMI);":[118],"hence":[120],"name":[121],"our":[122],"method":[123],"MMI-CTC.":[124],"addition":[126],"maximising":[128],"posterior":[130],"probability":[131,144],"target":[134],"sequence,":[135],"enables":[138],"models":[139],"explicitly":[141],"minimise":[142],"competing":[146],"hypothesis":[147],"at":[148],"word":[150],"level.":[152],"Our":[153],"experimental":[154],"results":[155],"show":[156],"MMI-CTC":[158],"efficient":[161],"than":[162],"and":[164],"essential":[169]},"counts_by_year":[{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
