{"id":"https://openalex.org/W1519434885","doi":"https://doi.org/10.1109/mmsp.2002.1203294","title":"Recent progress in spontaneous speech recognition and understanding","display_name":"Recent progress in spontaneous speech recognition and understanding","publication_year":2004,"publication_date":"2004-01-24","ids":{"openalex":"https://openalex.org/W1519434885","doi":"https://doi.org/10.1109/mmsp.2002.1203294","mag":"1519434885"},"language":"en","primary_location":{"id":"doi:10.1109/mmsp.2002.1203294","is_oa":false,"landing_page_url":"https://doi.org/10.1109/mmsp.2002.1203294","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2002 IEEE Workshop on Multimedia Signal Processing.","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5009532108","display_name":"Sadaoki Furui","orcid":null},"institutions":[{"id":"https://openalex.org/I114531698","display_name":"Tokyo Institute of Technology","ror":"https://ror.org/0112mx960","country_code":"JP","type":"education","lineage":["https://openalex.org/I114531698"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"S. Furui","raw_affiliation_strings":["Department of Computer Science, Tokyo Institute of Technology, Meguro, Tokyo, Japan","Department of Computer Science, Tokyo Institute of Technology, Japan"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, Tokyo Institute of Technology, Meguro, Tokyo, Japan","institution_ids":["https://openalex.org/I114531698"]},{"raw_affiliation_string":"Department of Computer Science, Tokyo Institute of Technology, Japan","institution_ids":["https://openalex.org/I114531698"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5009532108"],"corresponding_institution_ids":["https://openalex.org/I114531698"],"apc_list":null,"apc_paid":null,"fwci":1.3491,"has_fulltext":false,"cited_by_count":12,"citation_normalized_percentile":{"value":0.83624454,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":"1","issue":null,"first_page":"253","last_page":"258"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9975000023841858,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.9970999956130981,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7655472755432129},{"id":"https://openalex.org/keywords/speech-analytics","display_name":"Speech analytics","score":0.6909007430076599},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6715967655181885},{"id":"https://openalex.org/keywords/automatic-summarization","display_name":"Automatic summarization","score":0.6404799222946167},{"id":"https://openalex.org/keywords/presentation","display_name":"Presentation (obstetrics)","score":0.6389727592468262},{"id":"https://openalex.org/keywords/speech-technology","display_name":"Speech technology","score":0.612832248210907},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.554812490940094},{"id":"https://openalex.org/keywords/speaker-recognition","display_name":"Speaker recognition","score":0.5446897745132446},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.5274945497512817},{"id":"https://openalex.org/keywords/speech-corpus","display_name":"Speech corpus","score":0.4715043604373932},{"id":"https://openalex.org/keywords/audio-mining","display_name":"Audio mining","score":0.44423770904541016},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.41326993703842163},{"id":"https://openalex.org/keywords/voice-activity-detection","display_name":"Voice activity detection","score":0.3949468433856964},{"id":"https://openalex.org/keywords/speech-synthesis","display_name":"Speech synthesis","score":0.2779567837715149},{"id":"https://openalex.org/keywords/history","display_name":"History","score":0.05888381600379944}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7655472755432129},{"id":"https://openalex.org/C54953205","wikidata":"https://www.wikidata.org/wiki/Q4142201","display_name":"Speech analytics","level":4,"score":0.6909007430076599},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6715967655181885},{"id":"https://openalex.org/C170858558","wikidata":"https://www.wikidata.org/wiki/Q1394144","display_name":"Automatic summarization","level":2,"score":0.6404799222946167},{"id":"https://openalex.org/C2777601897","wikidata":"https://www.wikidata.org/wiki/Q3409113","display_name":"Presentation (obstetrics)","level":2,"score":0.6389727592468262},{"id":"https://openalex.org/C504749915","wikidata":"https://www.wikidata.org/wiki/Q9010971","display_name":"Speech technology","level":3,"score":0.612832248210907},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.554812490940094},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.5446897745132446},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.5274945497512817},{"id":"https://openalex.org/C91863865","wikidata":"https://www.wikidata.org/wiki/Q4349497","display_name":"Speech corpus","level":3,"score":0.4715043604373932},{"id":"https://openalex.org/C157968479","wikidata":"https://www.wikidata.org/wiki/Q3079876","display_name":"Audio mining","level":4,"score":0.44423770904541016},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.41326993703842163},{"id":"https://openalex.org/C204201278","wikidata":"https://www.wikidata.org/wiki/Q1332614","display_name":"Voice activity detection","level":3,"score":0.3949468433856964},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.2779567837715149},{"id":"https://openalex.org/C95457728","wikidata":"https://www.wikidata.org/wiki/Q309","display_name":"History","level":0,"score":0.05888381600379944},{"id":"https://openalex.org/C166957645","wikidata":"https://www.wikidata.org/wiki/Q23498","display_name":"Archaeology","level":1,"score":0.0},{"id":"https://openalex.org/C71924100","wikidata":"https://www.wikidata.org/wiki/Q11190","display_name":"Medicine","level":0,"score":0.0},{"id":"https://openalex.org/C126838900","wikidata":"https://www.wikidata.org/wiki/Q77604","display_name":"Radiology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/mmsp.2002.1203294","is_oa":false,"landing_page_url":"https://doi.org/10.1109/mmsp.2002.1203294","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2002 IEEE Workshop on Multimedia Signal Processing.","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":15,"referenced_works":["https://openalex.org/W200479330","https://openalex.org/W310645910","https://openalex.org/W1552981843","https://openalex.org/W1560013842","https://openalex.org/W1902712755","https://openalex.org/W2015895495","https://openalex.org/W2057713231","https://openalex.org/W2062149911","https://openalex.org/W2104173053","https://openalex.org/W2119820672","https://openalex.org/W2146871184","https://openalex.org/W3141224204","https://openalex.org/W4299681030","https://openalex.org/W6608047266","https://openalex.org/W6610854400"],"related_works":["https://openalex.org/W2355709873","https://openalex.org/W2337605147","https://openalex.org/W2036933852","https://openalex.org/W134179020","https://openalex.org/W1541790149","https://openalex.org/W1587401114","https://openalex.org/W2184127972","https://openalex.org/W4312668661","https://openalex.org/W642007152","https://openalex.org/W2218471654"],"abstract_inverted_index":{"How":[0],"to":[1,88,93],"recognize":[2],"and":[3,32,48,75],"understand":[4],"spontaneous":[5,68,96],"speech":[6,16,70,73,77,97],"is":[7],"one":[8],"of":[9,45,54,65],"the":[10,46,51,83],"most":[11,84],"important":[12,85],"issues":[13],"in":[14,36,38,91],"state-of-the-art":[15],"recognition":[17,98],"technology.":[18],"In":[19],"this":[20],"context,":[21],"a":[22],"five-year":[23],"large":[24],"scale":[25],"national":[26],"project":[27,47],"entitled":[28],"\"Spontaneous":[29],"speech:":[30],"corpus":[31],"processing":[33],"technology\"":[34],"started":[35],"Japan":[37],"1999.":[39],"This":[40],"paper":[41,80],"gives":[42],"an":[43],"overview":[44],"reports":[49],"on":[50],"major":[52],"results":[53],"experiments":[55],"that":[56],"have":[57],"been":[58],"conducted":[59],"so":[60],"far":[61],"at":[62],"Tokyo":[63],"Institute":[64],"Technology,":[66],"including":[67],"presentation":[69],"recognition,":[71],"automatic":[72],"summarization,":[74],"message-driven":[76],"recognition.":[78],"The":[79],"also":[81],"discusses":[82],"research":[86],"problems":[87],"be":[89],"solved":[90],"order":[92],"achieve":[94],"ultimate":[95],"systems.":[99]},"counts_by_year":[{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":2},{"year":2015,"cited_by_count":1},{"year":2012,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
