{"id":"https://openalex.org/W4292595872","doi":"https://doi.org/10.1145/3557894","title":"Improving Readability for Automatic Speech Recognition Transcription","display_name":"Improving Readability for Automatic Speech Recognition Transcription","publication_year":2022,"publication_date":"2022-08-22","ids":{"openalex":"https://openalex.org/W4292595872","doi":"https://doi.org/10.1145/3557894"},"language":"en","primary_location":{"id":"doi:10.1145/3557894","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3557894","pdf_url":null,"source":{"id":"https://openalex.org/S4306421405","display_name":"ACM Transactions on Asian and Low-Resource Language Information Processing","issn_l":"2375-4699","issn":["2375-4699","2375-4702"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Asian and Low-Resource Language Information Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5078591720","display_name":"Junwei Liao","orcid":"https://orcid.org/0000-0001-7321-7583"},"institutions":[{"id":"https://openalex.org/I150229711","display_name":"University of Electronic Science and Technology of China","ror":"https://ror.org/04qr3zq92","country_code":"CN","type":"education","lineage":["https://openalex.org/I150229711"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Junwei Liao","raw_affiliation_strings":["University of Electronic Science and Technology of China, Chengdu, China"],"raw_orcid":"https://orcid.org/0000-0001-7321-7583","affiliations":[{"raw_affiliation_string":"University of Electronic Science and Technology of China, Chengdu, China","institution_ids":["https://openalex.org/I150229711"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026088950","display_name":"\u015eefik Emre Eskimez","orcid":"https://orcid.org/0000-0001-6259-5925"},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]},{"id":"https://openalex.org/I4210147207","display_name":"The Dialogue","ror":"https://ror.org/04d3hex36","country_code":"US","type":"other","lineage":["https://openalex.org/I4210147207"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Sefik Eskimez","raw_affiliation_strings":["Microsoft Speech and Dialogue Research Group, USA"],"raw_orcid":"https://orcid.org/0000-0001-6259-5925","affiliations":[{"raw_affiliation_string":"Microsoft Speech and Dialogue Research Group, USA","institution_ids":["https://openalex.org/I4210147207","https://openalex.org/I1290206253"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102933609","display_name":"Liyang Lu","orcid":"https://orcid.org/0000-0001-5003-7459"},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]},{"id":"https://openalex.org/I4210147207","display_name":"The Dialogue","ror":"https://ror.org/04d3hex36","country_code":"US","type":"other","lineage":["https://openalex.org/I4210147207"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Liyang Lu","raw_affiliation_strings":["Microsoft Speech and Dialogue Research Group, USA"],"raw_orcid":"https://orcid.org/0000-0001-5003-7459","affiliations":[{"raw_affiliation_string":"Microsoft Speech and Dialogue Research Group, USA","institution_ids":["https://openalex.org/I4210147207","https://openalex.org/I1290206253"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101597408","display_name":"Yu Shi","orcid":"https://orcid.org/0000-0003-1872-3429"},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]},{"id":"https://openalex.org/I4210147207","display_name":"The Dialogue","ror":"https://ror.org/04d3hex36","country_code":"US","type":"other","lineage":["https://openalex.org/I4210147207"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yu Shi","raw_affiliation_strings":["Microsoft Speech and Dialogue Research Group, USA"],"raw_orcid":"https://orcid.org/0000-0003-1872-3429","affiliations":[{"raw_affiliation_string":"Microsoft Speech and Dialogue Research Group, USA","institution_ids":["https://openalex.org/I4210147207","https://openalex.org/I1290206253"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101640051","display_name":"Ming Gong","orcid":"https://orcid.org/0000-0001-6140-7187"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ming Gong","raw_affiliation_strings":["Microsoft STCA NLP Group, China"],"raw_orcid":"https://orcid.org/0000-0001-6140-7187","affiliations":[{"raw_affiliation_string":"Microsoft STCA NLP Group, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5077262995","display_name":"Linjun Shou","orcid":"https://orcid.org/0000-0002-1050-7708"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Linjun Shou","raw_affiliation_strings":["Microsoft STCA NLP Group, China"],"raw_orcid":"https://orcid.org/0000-0002-1050-7708","affiliations":[{"raw_affiliation_string":"Microsoft STCA NLP Group, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5021975286","display_name":"Hong Qu","orcid":"https://orcid.org/0000-0001-6114-3441"},"institutions":[{"id":"https://openalex.org/I150229711","display_name":"University of Electronic Science and Technology of China","ror":"https://ror.org/04qr3zq92","country_code":"CN","type":"education","lineage":["https://openalex.org/I150229711"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hong Qu","raw_affiliation_strings":["University of Electronic Science and Technology of China, Chengdu, China"],"raw_orcid":"https://orcid.org/0000-0001-6114-3441","affiliations":[{"raw_affiliation_string":"University of Electronic Science and Technology of China, Chengdu, China","institution_ids":["https://openalex.org/I150229711"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5089195158","display_name":"Michael Zeng","orcid":"https://orcid.org/0000-0001-5302-5883"},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]},{"id":"https://openalex.org/I4210147207","display_name":"The Dialogue","ror":"https://ror.org/04d3hex36","country_code":"US","type":"other","lineage":["https://openalex.org/I4210147207"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Michael Zeng","raw_affiliation_strings":["Microsoft Speech and Dialogue Research Group, USA"],"raw_orcid":"https://orcid.org/0000-0001-5302-5883","affiliations":[{"raw_affiliation_string":"Microsoft Speech and Dialogue Research Group, USA","institution_ids":["https://openalex.org/I4210147207","https://openalex.org/I1290206253"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5078591720"],"corresponding_institution_ids":["https://openalex.org/I150229711"],"apc_list":null,"apc_paid":null,"fwci":4.1616,"has_fulltext":false,"cited_by_count":39,"citation_normalized_percentile":{"value":0.94674254,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":100},"biblio":{"volume":"22","issue":"5","first_page":"1","last_page":"23"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8454749584197998},{"id":"https://openalex.org/keywords/readability","display_name":"Readability","score":0.6770375370979309},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.6721805930137634},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.6684295535087585},{"id":"https://openalex.org/keywords/baseline","display_name":"Baseline (sea)","score":0.6269890666007996},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5898575782775879},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5722928047180176},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.5592333674430847},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5268260836601257},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.5251764059066772},{"id":"https://openalex.org/keywords/sequence-labeling","display_name":"Sequence labeling","score":0.5013139247894287},{"id":"https://openalex.org/keywords/transcription","display_name":"Transcription (linguistics)","score":0.4434627294540405},{"id":"https://openalex.org/keywords/hidden-markov-model","display_name":"Hidden Markov model","score":0.41254401206970215},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3275611996650696}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8454749584197998},{"id":"https://openalex.org/C2778143727","wikidata":"https://www.wikidata.org/wiki/Q1820650","display_name":"Readability","level":2,"score":0.6770375370979309},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6721805930137634},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.6684295535087585},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.6269890666007996},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5898575782775879},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5722928047180176},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.5592333674430847},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5268260836601257},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.5251764059066772},{"id":"https://openalex.org/C35639132","wikidata":"https://www.wikidata.org/wiki/Q7452468","display_name":"Sequence labeling","level":3,"score":0.5013139247894287},{"id":"https://openalex.org/C179926584","wikidata":"https://www.wikidata.org/wiki/Q207714","display_name":"Transcription (linguistics)","level":2,"score":0.4434627294540405},{"id":"https://openalex.org/C23224414","wikidata":"https://www.wikidata.org/wiki/Q176769","display_name":"Hidden Markov model","level":2,"score":0.41254401206970215},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3275611996650696},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C127313418","wikidata":"https://www.wikidata.org/wiki/Q1069","display_name":"Geology","level":0,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C111368507","wikidata":"https://www.wikidata.org/wiki/Q43518","display_name":"Oceanography","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3557894","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3557894","pdf_url":null,"source":{"id":"https://openalex.org/S4306421405","display_name":"ACM Transactions on Asian and Low-Resource Language Information Processing","issn_l":"2375-4699","issn":["2375-4699","2375-4702"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Asian and Low-Resource Language Information Processing","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.7599999904632568}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":67,"referenced_works":["https://openalex.org/W10801008","https://openalex.org/W157500369","https://openalex.org/W1498990157","https://openalex.org/W1524281572","https://openalex.org/W1999762143","https://openalex.org/W2003123121","https://openalex.org/W2040380014","https://openalex.org/W2101105183","https://openalex.org/W2101446498","https://openalex.org/W2123343069","https://openalex.org/W2133012565","https://openalex.org/W2152638814","https://openalex.org/W2169187092","https://openalex.org/W2183341477","https://openalex.org/W2315316408","https://openalex.org/W2396384435","https://openalex.org/W2483327705","https://openalex.org/W2507699225","https://openalex.org/W2512924740","https://openalex.org/W2514996388","https://openalex.org/W2525778437","https://openalex.org/W2560674852","https://openalex.org/W2594706607","https://openalex.org/W2606429533","https://openalex.org/W2608239929","https://openalex.org/W2620507731","https://openalex.org/W2741609678","https://openalex.org/W2756954690","https://openalex.org/W2758074402","https://openalex.org/W2785918467","https://openalex.org/W2810035278","https://openalex.org/W2899663614","https://openalex.org/W2903853691","https://openalex.org/W2916979304","https://openalex.org/W2916997151","https://openalex.org/W2948335087","https://openalex.org/W2962784628","https://openalex.org/W2962805889","https://openalex.org/W2963026768","https://openalex.org/W2963261349","https://openalex.org/W2963341956","https://openalex.org/W2964243274","https://openalex.org/W2965373594","https://openalex.org/W2970076840","https://openalex.org/W2970521905","https://openalex.org/W2970868759","https://openalex.org/W2972529197","https://openalex.org/W2972799129","https://openalex.org/W2981852735","https://openalex.org/W3015752032","https://openalex.org/W3034999214","https://openalex.org/W3041181542","https://openalex.org/W3105804405","https://openalex.org/W3165574448","https://openalex.org/W3196167364","https://openalex.org/W4212947519","https://openalex.org/W4221167110","https://openalex.org/W4224308101","https://openalex.org/W4288089799","https://openalex.org/W4289121820","https://openalex.org/W4292779060","https://openalex.org/W4302078636","https://openalex.org/W4310895557","https://openalex.org/W4389521028","https://openalex.org/W6755977528","https://openalex.org/W6778883912","https://openalex.org/W6810081322"],"related_works":["https://openalex.org/W1964661231","https://openalex.org/W4254960163","https://openalex.org/W3110264473","https://openalex.org/W2032810564","https://openalex.org/W2370831213","https://openalex.org/W2535098683","https://openalex.org/W2437596814","https://openalex.org/W2085373452","https://openalex.org/W4205820553","https://openalex.org/W2251441308"],"abstract_inverted_index":{"Modern":[0],"Automatic":[1],"Speech":[2],"Recognition":[3],"(ASR)":[4],"systems":[5,46],"can":[6,21],"achieve":[7],"high":[8],"performance":[9,50,165,200],"in":[10,35,206],"terms":[11,207],"of":[12,51,57,106,128,208,258,266],"recognition":[13],"accuracy.":[14],"However,":[15],"a":[16,65,77,93,133,137,202],"perfectly":[17],"accurate":[18],"transcript":[19],"still":[20],"be":[22],"challenging":[23],"to":[24,27,86,124,135,162,262],"read":[25],"due":[26],"grammatical":[28,149],"errors,":[29],"disfluency,":[30],"and":[31,44,54,73,98,120,197,228,239,251],"other":[32],"noises":[33],"common":[34],"spoken":[36],"communication.":[37],"These":[38],"readable":[39,94],"issues":[40],"introduced":[41],"by":[42,143,237],"speakers":[43],"ASR":[45,68,90,267],"will":[47],"impair":[48],"the":[49,55,88,103,111,115,126,140,145,167,180,184,190,194,217,224,234,256,259,264],"downstream":[52,99],"tasks":[53,100,161],"understanding":[56],"human":[58,249],"readers.":[59],"In":[60],"this":[61],"work,":[62],"we":[63,131,153,171,188],"present":[64],"task":[66,84,113,142],"called":[67],"post-processing":[69],"for":[70,96,139,148,183],"readability":[71,265],"(APR)":[72],"formulate":[74],"it":[75],"as":[76,179],"sequence-to-sequence":[78],"text":[79,95],"generation":[80],"problem.":[81],"The":[82,248],"APR":[83,112,141,168,185],"aims":[85],"transform":[87],"noisy":[89],"output":[91],"into":[92],"humans":[97],"while":[101],"maintaining":[102],"semantic":[104],"meaning":[105],"speakers.":[107],"We":[108],"further":[109,254],"study":[110,253],"from":[114,159],"benchmark":[116],"dataset,":[117],"evaluation":[118,210,250],"metrics,":[119],"baseline":[121,181,191,219],"models:":[122],"First,":[123],"address":[125],"lack":[127],"task-specific":[129],"data,":[130],"propose":[132],"method":[134,205,236],"construct":[136],"dataset":[138,196],"using":[144],"data":[146],"collected":[147],"error":[150],"correction.":[151],"Second,":[152],"utilize":[154],"metrics":[155],"adapted":[156,176,230],"or":[157,175],"borrowed":[158],"similar":[160],"evaluate":[163],"model":[164,232,261],"on":[166,193,243],"task.":[169,186],"Lastly,":[170],"use":[172],"several":[173],"typical":[174],"pre-trained":[177],"models":[178,182,192,220],"Furthermore,":[187],"fine-tune":[189],"constructed":[195],"compare":[198],"their":[199],"with":[201],"traditional":[203,225],"pipeline":[204,226,235],"proposed":[209,260],"metrics.":[211],"Experimental":[212],"results":[213],"show":[214],"that":[215],"all":[216],"fine-tuned":[218],"perform":[221],"better":[222],"than":[223],"method,":[227],"our":[229],"RoBERTa":[231],"outperforms":[233],"4.95":[238],"6.63":[240],"BLEU":[241],"points":[242],"two":[244],"test":[245],"sets,":[246],"respectively.":[247],"case":[252],"reveal":[255],"ability":[257],"improve":[263],"transcripts.":[268]},"counts_by_year":[{"year":2026,"cited_by_count":5},{"year":2025,"cited_by_count":7},{"year":2024,"cited_by_count":12},{"year":2023,"cited_by_count":10},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":3},{"year":2020,"cited_by_count":1}],"updated_date":"2026-05-21T06:26:12.895304","created_date":"2025-10-10T00:00:00"}
