{"id":"https://openalex.org/W2884768626","doi":"https://doi.org/10.21437/interspeech.2018-2537","title":"Towards Automatic Speech Identification from Vocal Tract Shape Dynamics in Real-time MRI","display_name":"Towards Automatic Speech Identification from Vocal Tract Shape Dynamics in Real-time MRI","publication_year":2018,"publication_date":"2018-08-28","ids":{"openalex":"https://openalex.org/W2884768626","doi":"https://doi.org/10.21437/interspeech.2018-2537","mag":"2884768626"},"language":"en","primary_location":{"id":"doi:10.21437/interspeech.2018-2537","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2018-2537","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2018","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5109583051","display_name":"Pramit Saha","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Pramit Saha","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5003087094","display_name":"Praneeth Srungarapu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Praneeth Srungarapu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5071585967","display_name":"Sidney Fels","orcid":"https://orcid.org/0000-0001-9279-9021"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sidney Fels","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5109583051"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":2.1469,"has_fulltext":false,"cited_by_count":16,"citation_normalized_percentile":{"value":0.88377644,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1249","last_page":"1253"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9969000220298767,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9969000220298767,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9947999715805054,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9865000247955322,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/vocal-tract","display_name":"Vocal tract","score":0.9219273924827576},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.767341136932373},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6914645433425903},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.5668551325798035},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.531732976436615},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4908338189125061},{"id":"https://openalex.org/keywords/vowel","display_name":"Vowel","score":0.48695123195648193},{"id":"https://openalex.org/keywords/identification","display_name":"Identification (biology)","score":0.4806560277938843},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.42748355865478516},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3219619691371918}],"concepts":[{"id":"https://openalex.org/C47401133","wikidata":"https://www.wikidata.org/wiki/Q748953","display_name":"Vocal tract","level":2,"score":0.9219273924827576},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.767341136932373},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6914645433425903},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.5668551325798035},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.531732976436615},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4908338189125061},{"id":"https://openalex.org/C2779581591","wikidata":"https://www.wikidata.org/wiki/Q36244","display_name":"Vowel","level":2,"score":0.48695123195648193},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.4806560277938843},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.42748355865478516},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3219619691371918},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C59822182","wikidata":"https://www.wikidata.org/wiki/Q441","display_name":"Botany","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.21437/interspeech.2018-2537","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2018-2537","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2018","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.5400000214576721,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2079194684","https://openalex.org/W2617269004","https://openalex.org/W35292311","https://openalex.org/W2803166627","https://openalex.org/W2046073792","https://openalex.org/W2050311283","https://openalex.org/W2040757176","https://openalex.org/W153239700","https://openalex.org/W2067459736","https://openalex.org/W2105635394"],"abstract_inverted_index":{"Vocal":[0],"tract":[1,48,130],"configurations":[2],"play":[3],"a":[4,41,82,86,111,121,161],"vital":[5],"role":[6],"in":[7,21,164,168],"generating":[8],"distinguishable":[9],"speech":[10,22,37,172],"sounds,":[11],"by":[12,135],"modulating":[13],"the":[14,35,78,100,107,158,165,169],"airflow":[15],"and":[16,150],"creating":[17],"different":[18,70],"resonant":[19],"cavities":[20],"production.":[23],"They":[24],"contain":[25],"abundant":[26],"information":[27],"that":[28,97],"can":[29],"be":[30],"utilized":[31],"to":[32,51,68,105,176],"better":[33],"understand":[34],"underlying":[36],"production":[38],"mechanism.":[39],"As":[40],"step":[42],"towards":[43],"automatic":[44],"mapping":[45],"of":[46,77,110,124,128,141,144,171],"vocal":[47,79,129],"shape":[49],"geometry":[50],"acoustics,":[52],"this":[53,142],"paper":[54],"employs":[55],"effective":[56],"video":[57,113,116,180],"action":[58],"recognition":[59],"techniques,":[60],"like":[61],"Long-term":[62],"Recurrent":[63,95],"Convolutional":[64],"Networks":[65],"(LRCN)":[66],"models,":[67],"identify":[69],"vowel-consonant-vowel":[71],"(VCV)":[72],"sequences":[73],"from":[74],"dynamic":[75],"shaping":[76,131],"tract.":[80],"Such":[81],"model":[83,166],"typically":[84],"combines":[85],"CNN":[87],"based":[88],"deep":[89,103],"hierarchical":[90],"visual":[91],"feature":[92],"extractor":[93],"with":[94,174],"Networks,":[96],"ideally":[98],"makes":[99],"network":[101],"spatio-temporally":[102],"enough":[104],"learn":[106],"sequential":[108],"dynamics":[109],"short":[112],"clip":[114],"for":[115,151],"classification":[117,153,173,181],"tasks.":[118,182],"We":[119],"use":[120],"database":[122],"consisting":[123],"2D":[125],"real-time":[126],"MRI":[127],"during":[132],"VCV":[133],"utterances":[134],"17":[136],"speakers.":[137],"The":[138],"comparative":[139],"performances":[140],"class":[143],"algorithms":[145],"under":[146],"various":[147,152],"parameter":[148],"settings":[149],"tasks":[154],"are":[155],"discussed.":[156],"Interestingly,":[157],"results":[159],"show":[160],"marked":[162],"difference":[163],"performance":[167],"context":[170],"respect":[175],"generic":[177],"sequence":[178],"or":[179]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":4},{"year":2020,"cited_by_count":7},{"year":2019,"cited_by_count":1},{"year":2018,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
