{"id":"https://openalex.org/W2111431431","doi":"https://doi.org/10.1109/asru.2009.5373365","title":"Automatic punctuation generation for speech","display_name":"Automatic punctuation generation for speech","publication_year":2009,"publication_date":"2009-12-01","ids":{"openalex":"https://openalex.org/W2111431431","doi":"https://doi.org/10.1109/asru.2009.5373365","mag":"2111431431"},"language":"en","primary_location":{"id":"doi:10.1109/asru.2009.5373365","is_oa":false,"landing_page_url":"https://doi.org/10.1109/asru.2009.5373365","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2009 IEEE Workshop on Automatic Speech Recognition &amp; Understanding","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5090494863","display_name":"Wenzhu Shen","orcid":null},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]},{"id":"https://openalex.org/I4210113369","display_name":"Microsoft Research Asia (China)","ror":"https://ror.org/0300m5276","country_code":"CN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210113369"]}],"countries":["CN","US"],"is_corresponding":true,"raw_author_name":"Wenzhu Shen","raw_affiliation_strings":["Microsoft Research Asia, Beijing Sigma Center, Beijing, China","Microsoft Research Asia, 5F Beijing Sigma Center, 49 Zhichun Rd., 100080 China"],"affiliations":[{"raw_affiliation_string":"Microsoft Research Asia, Beijing Sigma Center, Beijing, China","institution_ids":["https://openalex.org/I4210113369"]},{"raw_affiliation_string":"Microsoft Research Asia, 5F Beijing Sigma Center, 49 Zhichun Rd., 100080 China","institution_ids":["https://openalex.org/I1290206253"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5016239612","display_name":"Roger Peng Yu","orcid":null},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]},{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN","US"],"is_corresponding":false,"raw_author_name":"Roger Peng Yu","raw_affiliation_strings":["Speech Recognition Laboratory For Information Science and Technology, Department of Electronic Engineering, Tsinghua University, Beijing, China","Microsoft Research Asia, 5F Beijing Sigma Center, 49 Zhichun Rd., 100080 China"],"affiliations":[{"raw_affiliation_string":"Speech Recognition Laboratory For Information Science and Technology, Department of Electronic Engineering, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]},{"raw_affiliation_string":"Microsoft Research Asia, 5F Beijing Sigma Center, 49 Zhichun Rd., 100080 China","institution_ids":["https://openalex.org/I1290206253"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5072932051","display_name":"Frank Seide","orcid":null},"institutions":[{"id":"https://openalex.org/I4210113369","display_name":"Microsoft Research Asia (China)","ror":"https://ror.org/0300m5276","country_code":"CN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210113369"]},{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["CN","US"],"is_corresponding":false,"raw_author_name":"Frank Seide","raw_affiliation_strings":["Microsoft Research Asia, Beijing Sigma Center, Beijing, China","Microsoft Research Asia, 5F Beijing Sigma Center, 49 Zhichun Rd., 100080 China"],"affiliations":[{"raw_affiliation_string":"Microsoft Research Asia, Beijing Sigma Center, Beijing, China","institution_ids":["https://openalex.org/I4210113369"]},{"raw_affiliation_string":"Microsoft Research Asia, 5F Beijing Sigma Center, 49 Zhichun Rd., 100080 China","institution_ids":["https://openalex.org/I1290206253"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5014958566","display_name":"Ji Wu","orcid":"https://orcid.org/0000-0001-6170-726X"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ji Wu","raw_affiliation_strings":["Speech Recognition Laboratory For Information Science and Technology, Department of Electronic Engineering, Tsinghua University, Beijing, China","Speech Recognition Laboratory For Information Science and Technology, Department of Electronic Engineering, Tsinghua University, 100084 Beijing, China"],"affiliations":[{"raw_affiliation_string":"Speech Recognition Laboratory For Information Science and Technology, Department of Electronic Engineering, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]},{"raw_affiliation_string":"Speech Recognition Laboratory For Information Science and Technology, Department of Electronic Engineering, Tsinghua University, 100084 Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5090494863"],"corresponding_institution_ids":["https://openalex.org/I1290206253","https://openalex.org/I4210113369"],"apc_list":null,"apc_paid":null,"fwci":0.8724,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.80472114,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"586","last_page":"589"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/punctuation","display_name":"Punctuation","score":0.9278568625450134},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8224244713783264},{"id":"https://openalex.org/keywords/trigram","display_name":"Trigram","score":0.7043402791023254},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6689883470535278},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.58589106798172},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5713822245597839},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5534015893936157},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.5346713662147522},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.4915364682674408},{"id":"https://openalex.org/keywords/acoustic-model","display_name":"Acoustic model","score":0.4802561104297638},{"id":"https://openalex.org/keywords/transcription","display_name":"Transcription (linguistics)","score":0.468850702047348},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.2954593896865845},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.10913333296775818}],"concepts":[{"id":"https://openalex.org/C540372491","wikidata":"https://www.wikidata.org/wiki/Q82622","display_name":"Punctuation","level":2,"score":0.9278568625450134},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8224244713783264},{"id":"https://openalex.org/C137546455","wikidata":"https://www.wikidata.org/wiki/Q3213474","display_name":"Trigram","level":2,"score":0.7043402791023254},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6689883470535278},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.58589106798172},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5713822245597839},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5534015893936157},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.5346713662147522},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.4915364682674408},{"id":"https://openalex.org/C155635449","wikidata":"https://www.wikidata.org/wiki/Q4674699","display_name":"Acoustic model","level":3,"score":0.4802561104297638},{"id":"https://openalex.org/C179926584","wikidata":"https://www.wikidata.org/wiki/Q207714","display_name":"Transcription (linguistics)","level":2,"score":0.468850702047348},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.2954593896865845},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.10913333296775818},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/asru.2009.5373365","is_oa":false,"landing_page_url":"https://doi.org/10.1109/asru.2009.5373365","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2009 IEEE Workshop on Automatic Speech Recognition &amp; Understanding","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.6700000166893005,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":15,"referenced_works":["https://openalex.org/W32283220","https://openalex.org/W94670513","https://openalex.org/W171402391","https://openalex.org/W192736094","https://openalex.org/W1560013842","https://openalex.org/W1587490638","https://openalex.org/W2085621450","https://openalex.org/W2113851376","https://openalex.org/W2169187092","https://openalex.org/W2175959952","https://openalex.org/W6603793917","https://openalex.org/W6607019473","https://openalex.org/W6607863627","https://openalex.org/W6635223454","https://openalex.org/W6690885502"],"related_works":["https://openalex.org/W2105076537","https://openalex.org/W2020757772","https://openalex.org/W2072223048","https://openalex.org/W2041167939","https://openalex.org/W2250909759","https://openalex.org/W1975573722","https://openalex.org/W1620568205","https://openalex.org/W2002221802","https://openalex.org/W1823613926","https://openalex.org/W2551914602"],"abstract_inverted_index":{"Automatic":[0],"generation":[1],"of":[2,38,100,136],"punctuation":[3,23,73],"is":[4,108,116],"an":[5,39,62,134],"essential":[6],"feature":[7],"for":[8,21,43,54,103,140,150],"many":[9],"speech-to-text":[10],"transcription":[11],"tasks.":[12],"This":[13],"paper":[14],"describes":[15],"a":[16,50,67,70],"maximum":[17],"a-posteriori":[18],"(MAP)":[19],"approach":[20],"inserting":[22],"marks":[24],"into":[25,77],"raw":[26],"word":[27],"sequences":[28],"obtained":[29],"from":[30],"automatic":[31],"speech":[32],"recognition":[33],"(ASR).":[34],"The":[35,57,75],"system":[36],"consists":[37],"\u00bfacoustic":[40],"model\u00bf":[41,52],"(AM)":[42],"prosodic":[44,128],"features":[45],"(actually":[46],"pause":[47],"duration)":[48],"and":[49,66,69,79,119,129,138,142,148],"\u00bflanguage":[51],"(LM)":[53],"text-only":[55],"features.":[56],"LM":[58,93,115],"combines":[59],"three":[60],"components:":[61],"MLP-based":[63],"trigger-word":[64,114],"model":[65,81],"forward":[68],"backward":[71],"trigram":[72],"predictor.":[74],"separation":[76],"acoustic":[78,106],"language":[80],"allows":[82],"to":[83,94],"learn":[84],"these":[85],"models":[86],"on":[87,97,145,152],"different":[88],"corpora,":[89],"especially":[90],"allowing":[91],"the":[92,113],"be":[95,123],"trained":[96],"large":[98],"amounts":[99],"data":[101],"(text)":[102],"which":[104],"no":[105],"information":[107],"available.":[109],"We":[110,132],"find":[111],"that":[112],"very":[117],"useful,":[118],"further":[120],"improvement":[121],"can":[122],"achieved":[124],"when":[125],"combining":[126],"both":[127],"lexical":[130],"information.":[131],"achieve":[133],"F-measure":[135],"81.0%":[137],"56.5%":[139],"voicemails":[141,151],"podcasts,":[143],"respectively,":[144],"reference":[146],"transcripts,":[147],"69.6%":[149],"ASR":[153],"transcripts.":[154]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2017,"cited_by_count":1},{"year":2014,"cited_by_count":1},{"year":2013,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
