{"id":"https://openalex.org/W2935886602","doi":"https://doi.org/10.1109/icassp.2019.8683722","title":"Towards Automatic Methods to Detect Errors in Transcriptions of Speech Recordings","display_name":"Towards Automatic Methods to Detect Errors in Transcriptions of Speech Recordings","publication_year":2019,"publication_date":"2019-04-17","ids":{"openalex":"https://openalex.org/W2935886602","doi":"https://doi.org/10.1109/icassp.2019.8683722","mag":"2935886602"},"language":"en","primary_location":{"id":"doi:10.1109/icassp.2019.8683722","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp.2019.8683722","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2019 - 2019 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5025903049","display_name":"Jinyi Yang","orcid":"https://orcid.org/0000-0001-5287-4242"},"institutions":[{"id":"https://openalex.org/I145311948","display_name":"Johns Hopkins University","ror":"https://ror.org/00za53h95","country_code":"US","type":"education","lineage":["https://openalex.org/I145311948"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jinyi Yang","raw_affiliation_strings":["Center for Language and Speech Processing, Johns Hopkins University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Center for Language and Speech Processing, Johns Hopkins University","institution_ids":["https://openalex.org/I145311948"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5104831405","display_name":"Lucas Ondel","orcid":"https://orcid.org/0000-0003-4512-0471"},"institutions":[{"id":"https://openalex.org/I60587646","display_name":"Brno University of Technology","ror":"https://ror.org/03613d656","country_code":"CZ","type":"education","lineage":["https://openalex.org/I60587646"]}],"countries":["CZ"],"is_corresponding":false,"raw_author_name":"Lucas Ondel","raw_affiliation_strings":["FIT, IT4I Centre of Excellence, Brno University of Technology"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"FIT, IT4I Centre of Excellence, Brno University of Technology","institution_ids":["https://openalex.org/I60587646"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5028956985","display_name":"Vimal Manohar","orcid":null},"institutions":[{"id":"https://openalex.org/I145311948","display_name":"Johns Hopkins University","ror":"https://ror.org/00za53h95","country_code":"US","type":"education","lineage":["https://openalex.org/I145311948"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Vimal Manohar","raw_affiliation_strings":["Center for Language and Speech Processing, Johns Hopkins University","Human Language Technology Center of Excellence, Johns Hopkins University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Center for Language and Speech Processing, Johns Hopkins University","institution_ids":["https://openalex.org/I145311948"]},{"raw_affiliation_string":"Human Language Technology Center of Excellence, Johns Hopkins University","institution_ids":["https://openalex.org/I145311948"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5042260050","display_name":"Hynek He\u0159mansk\u00fd","orcid":"https://orcid.org/0000-0001-8032-4811"},"institutions":[{"id":"https://openalex.org/I145311948","display_name":"Johns Hopkins University","ror":"https://ror.org/00za53h95","country_code":"US","type":"education","lineage":["https://openalex.org/I145311948"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Hynek Hermansky","raw_affiliation_strings":["Center for Language and Speech Processing, Johns Hopkins University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Center for Language and Speech Processing, Johns Hopkins University","institution_ids":["https://openalex.org/I145311948"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.2892,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.65060684,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"3747","last_page":"3751"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9969000220298767,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7848691940307617},{"id":"https://openalex.org/keywords/hidden-markov-model","display_name":"Hidden Markov model","score":0.7726429104804993},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.7614184617996216},{"id":"https://openalex.org/keywords/timit","display_name":"TIMIT","score":0.7561351656913757},{"id":"https://openalex.org/keywords/word-error-rate","display_name":"Word error rate","score":0.6854537129402161},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.5418155789375305},{"id":"https://openalex.org/keywords/levenshtein-distance","display_name":"Levenshtein distance","score":0.5286120772361755},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4988112449645996},{"id":"https://openalex.org/keywords/transcription","display_name":"Transcription (linguistics)","score":0.48530906438827515},{"id":"https://openalex.org/keywords/bayesian-probability","display_name":"Bayesian probability","score":0.4335680603981018}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7848691940307617},{"id":"https://openalex.org/C23224414","wikidata":"https://www.wikidata.org/wiki/Q176769","display_name":"Hidden Markov model","level":2,"score":0.7726429104804993},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7614184617996216},{"id":"https://openalex.org/C2778724510","wikidata":"https://www.wikidata.org/wiki/Q7670405","display_name":"TIMIT","level":3,"score":0.7561351656913757},{"id":"https://openalex.org/C40969351","wikidata":"https://www.wikidata.org/wiki/Q3516228","display_name":"Word error rate","level":2,"score":0.6854537129402161},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.5418155789375305},{"id":"https://openalex.org/C2777515626","wikidata":"https://www.wikidata.org/wiki/Q496939","display_name":"Levenshtein distance","level":2,"score":0.5286120772361755},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4988112449645996},{"id":"https://openalex.org/C179926584","wikidata":"https://www.wikidata.org/wiki/Q207714","display_name":"Transcription (linguistics)","level":2,"score":0.48530906438827515},{"id":"https://openalex.org/C107673813","wikidata":"https://www.wikidata.org/wiki/Q812534","display_name":"Bayesian probability","level":2,"score":0.4335680603981018},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp.2019.8683722","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp.2019.8683722","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2019 - 2019 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.8199999928474426,"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":21,"referenced_works":["https://openalex.org/W1486694252","https://openalex.org/W1524333225","https://openalex.org/W1975113979","https://openalex.org/W2024490156","https://openalex.org/W2126505708","https://openalex.org/W2166851633","https://openalex.org/W2347098582","https://openalex.org/W2464234964","https://openalex.org/W2510616059","https://openalex.org/W2767017426","https://openalex.org/W2888911345","https://openalex.org/W2962693497","https://openalex.org/W2962695963","https://openalex.org/W2964147121","https://openalex.org/W3127686677","https://openalex.org/W4294562888","https://openalex.org/W6629179373","https://openalex.org/W6631362777","https://openalex.org/W6684578138","https://openalex.org/W6719357382","https://openalex.org/W6789826613"],"related_works":["https://openalex.org/W3134920593","https://openalex.org/W2143247386","https://openalex.org/W1990589093","https://openalex.org/W2994894110","https://openalex.org/W4377862891","https://openalex.org/W2501000458","https://openalex.org/W3101868899","https://openalex.org/W1578749070","https://openalex.org/W2146842779","https://openalex.org/W2005708641"],"abstract_inverted_index":{"This":[0],"work":[1],"explores":[2],"different":[3],"methods":[4,115,152],"to":[5,58],"detect":[6],"errors":[7,118,155],"in":[8,119,156],"transcriptions":[9,19,32],"of":[10,23,116,153],"speech":[11,18],"recordings.":[12],"We":[13,112],"artificially":[14],"corrupt":[15],"well":[16],"transcribed":[17],"with":[20,79,89,130],"three":[21],"types":[22],"errors:":[24],"substitution,":[25],"insertion":[26],"and":[27,33,50,73,106,160],"deletion":[28],"on":[29,93,100,141],"TIMIT":[30,121],"phonemic":[31],"WSJ":[34,158],"word":[35],"transcriptions.":[36],"First,":[37],"we":[38,64,83,148],"use":[39],"Bayesian":[40,68],"model":[41,128,132],"selection":[42,129],"method":[43],"by":[44],"comparing":[45],"the":[46,114,123,142,151,161],"log-likelihoods":[47],"from":[48,109],"alignment":[49],"phone":[51],"recognizer,":[52],"a":[53,74,80,85],"final":[54],"score":[55],"is":[56,98],"computed":[57],"make":[59],"decision.":[60],"In":[61],"this":[62],"method,":[63],"consider":[65],"two":[66],"models,":[67],"Hidden":[69],"Markov":[70],"Model":[71],"(HMM)":[72],"Variational":[75],"Auto-Encoder":[76],"(VAE)":[77],"combined":[78],"HMM.":[81],"Alternately,":[82],"build":[84],"biased":[86,134,165],"ASR":[87],"system":[88],"language":[90],"models":[91],"trained":[92],"individual":[94],"transcriptions,":[95,159],"detection":[96],"decision":[97],"based":[99],"Levenshtein":[101],"distance":[102],"(LD)":[103],"between":[104],"transcription":[105],"oracle":[107],"path":[108],"decoded":[110],"lattice.":[111],"evaluate":[113,150],"detecting":[117,154],"corrupted":[120,157],"transcription,":[122],"best":[124,162],"result":[125,163],"(either":[126],"using":[127],"VAE":[131],"or":[133],"ASR)":[135,166],"achieves":[136,167],"7%":[137],"equal":[138,169],"error":[139,170],"rate":[140],"Detection":[143],"Error":[144],"Tradeoff":[145],"(DET)":[146],"curve;":[147],"also":[149],"(using":[164],"3%":[168],"rate.":[171]},"counts_by_year":[{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
