{"id":"https://openalex.org/W206460231","doi":"https://doi.org/10.21437/eurospeech.1997-116","title":"Normalization of speaker variability by spectrum warping for robust speech recognition","display_name":"Normalization of speaker variability by spectrum warping for robust speech recognition","publication_year":1997,"publication_date":"1997-09-22","ids":{"openalex":"https://openalex.org/W206460231","doi":"https://doi.org/10.21437/eurospeech.1997-116","mag":"206460231"},"language":"en","primary_location":{"id":"doi:10.21437/eurospeech.1997-116","is_oa":false,"landing_page_url":"https://doi.org/10.21437/eurospeech.1997-116","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"5th European Conference on Speech Communication and Technology (Eurospeech 1997)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5109698956","display_name":"Yunji Chu","orcid":"https://orcid.org/0009-0003-8348-1312"},"institutions":[{"id":"https://openalex.org/I1329325741","display_name":"Philips (Finland)","ror":"https://ror.org/01g4jev56","country_code":"FI","type":"company","lineage":["https://openalex.org/I1329325741","https://openalex.org/I4210122849"]}],"countries":["FI"],"is_corresponding":true,"raw_author_name":"Y.C. Chu","raw_affiliation_strings":["PHILIPS#TAB#"],"affiliations":[{"raw_affiliation_string":"PHILIPS#TAB#","institution_ids":["https://openalex.org/I1329325741"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5061817307","display_name":"Charlie Jie","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Charlie Jie","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5073667214","display_name":"Vincent Tung","orcid":"https://orcid.org/0000-0003-3230-0932"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Vincent Tung","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101954491","display_name":"Ben Lin","orcid":"https://orcid.org/0009-0008-8401-3699"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ben Lin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5100616648","display_name":"Richard Lee","orcid":"https://orcid.org/0000-0002-2397-0443"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Richard Lee","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5109698956"],"corresponding_institution_ids":["https://openalex.org/I1329325741"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.02869156,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1127","last_page":"1130"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10901","display_name":"Advanced Data Compression Techniques","score":0.9894999861717224,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/normalization","display_name":"Normalization (sociology)","score":0.8315657377243042},{"id":"https://openalex.org/keywords/image-warping","display_name":"Image warping","score":0.8130820989608765},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.7540125846862793},{"id":"https://openalex.org/keywords/filter-bank","display_name":"Filter bank","score":0.6850966215133667},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6752235889434814},{"id":"https://openalex.org/keywords/resampling","display_name":"Resampling","score":0.6603717803955078},{"id":"https://openalex.org/keywords/dynamic-time-warping","display_name":"Dynamic time warping","score":0.5368998050689697},{"id":"https://openalex.org/keywords/word-error-rate","display_name":"Word error rate","score":0.5228170156478882},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.5074822306632996},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5026347637176514},{"id":"https://openalex.org/keywords/scaling","display_name":"Scaling","score":0.4735538065433502},{"id":"https://openalex.org/keywords/filter","display_name":"Filter (signal processing)","score":0.3834002614021301},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.22121447324752808},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.1916956603527069}],"concepts":[{"id":"https://openalex.org/C136886441","wikidata":"https://www.wikidata.org/wiki/Q926129","display_name":"Normalization (sociology)","level":2,"score":0.8315657377243042},{"id":"https://openalex.org/C157202957","wikidata":"https://www.wikidata.org/wiki/Q1659609","display_name":"Image warping","level":2,"score":0.8130820989608765},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7540125846862793},{"id":"https://openalex.org/C100515483","wikidata":"https://www.wikidata.org/wiki/Q3268235","display_name":"Filter bank","level":3,"score":0.6850966215133667},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6752235889434814},{"id":"https://openalex.org/C150921843","wikidata":"https://www.wikidata.org/wiki/Q1170431","display_name":"Resampling","level":2,"score":0.6603717803955078},{"id":"https://openalex.org/C88516994","wikidata":"https://www.wikidata.org/wiki/Q1268863","display_name":"Dynamic time warping","level":2,"score":0.5368998050689697},{"id":"https://openalex.org/C40969351","wikidata":"https://www.wikidata.org/wiki/Q3516228","display_name":"Word error rate","level":2,"score":0.5228170156478882},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.5074822306632996},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5026347637176514},{"id":"https://openalex.org/C99844830","wikidata":"https://www.wikidata.org/wiki/Q102441924","display_name":"Scaling","level":2,"score":0.4735538065433502},{"id":"https://openalex.org/C106131492","wikidata":"https://www.wikidata.org/wiki/Q3072260","display_name":"Filter (signal processing)","level":2,"score":0.3834002614021301},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.22121447324752808},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.1916956603527069},{"id":"https://openalex.org/C19165224","wikidata":"https://www.wikidata.org/wiki/Q23404","display_name":"Anthropology","level":1,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.21437/eurospeech.1997-116","is_oa":false,"landing_page_url":"https://doi.org/10.21437/eurospeech.1997-116","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"5th European Conference on Speech Communication and Technology (Eurospeech 1997)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.44999998807907104}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":6,"referenced_works":["https://openalex.org/W2069501481","https://openalex.org/W2104924374","https://openalex.org/W2106119541","https://openalex.org/W2127730755","https://openalex.org/W2140567543","https://openalex.org/W2165871906"],"related_works":["https://openalex.org/W1670332068","https://openalex.org/W2095618524","https://openalex.org/W2347413598","https://openalex.org/W2330863229","https://openalex.org/W71572444","https://openalex.org/W1997383766","https://openalex.org/W2154472250","https://openalex.org/W2350336482","https://openalex.org/W3009759344","https://openalex.org/W263057764"],"abstract_inverted_index":{"This":[0],"paper":[1],"examines":[2],"techniques":[3],"for":[4,30,55,108],"normalization":[5],"of":[6,13,73,85,99],"unseen":[7,74,109],"speakers":[8,75],"in":[9,69],"recognition.":[10],"Two":[11],"implementations":[12],"linear":[14],"spectrum":[15,43],"warping":[16,44,56,92],"were":[17],"examined:":[18],"time":[19],"domain":[20],"resampling":[21],"and":[22],"filter":[23,46,86],"bank":[24,47,87],"scaling.":[25],"It":[26],"is":[27,64],"shown":[28,65],"that":[29],"seen":[31],"speakers,":[32],"the":[33,71,78,83,90,96],"models":[34],"trained":[35],"by":[36,45,50,104],"unwarped":[37],"utterances":[38],"are":[39],"less":[40],"sensitive":[41],"to":[42,66,77],"scaling":[48,88],"than":[49,106],"resampling.":[51],"A":[52],"pitch-based":[53,91],"scheme":[54],"factor":[57,93],"estimation":[58,94],"has":[59],"been":[60],"proposed.":[61],"The":[62],"method":[63],"be":[67],"cost-effective":[68],"reducing":[70],"variability":[72],"compared":[76],"ML-based":[79],"methods.":[80],"In":[81],"particular":[82],"combination":[84],"with":[89],"reduces":[95],"error":[97],"rate":[98],"isolated":[100],"Mandarin":[101],"digit":[102],"recognition":[103],"more":[105],"30%":[107],"speakers.":[110]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
