{"id":"https://openalex.org/W3172243058","doi":"https://doi.org/10.1109/taslp.2021.3084099","title":"Multimodal Representations for Synchronized Speech and Real-Time MRI Video Processing","display_name":"Multimodal Representations for Synchronized Speech and Real-Time MRI Video Processing","publication_year":2021,"publication_date":"2021-01-01","ids":{"openalex":"https://openalex.org/W3172243058","doi":"https://doi.org/10.1109/taslp.2021.3084099","mag":"3172243058"},"language":"en","primary_location":{"id":"doi:10.1109/taslp.2021.3084099","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2021.3084099","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5015639730","display_name":"\u00d6yk\u00fc Deniz K\u00f6se","orcid":"https://orcid.org/0000-0002-8685-2161"},"institutions":[{"id":"https://openalex.org/I4405392","display_name":"Bo\u011fazi\u00e7i University","ror":"https://ror.org/03z9tma90","country_code":"TR","type":"education","lineage":["https://openalex.org/I4405392"]}],"countries":["TR"],"is_corresponding":true,"raw_author_name":"Oyku Deniz Kose","raw_affiliation_strings":["Department of Electrical, and Electronics Engineering, Bogazici University, Istanbul, Turkey"],"affiliations":[{"raw_affiliation_string":"Department of Electrical, and Electronics Engineering, Bogazici University, Istanbul, Turkey","institution_ids":["https://openalex.org/I4405392"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5055086464","display_name":"Murat Sara\u00e7lar","orcid":"https://orcid.org/0000-0002-7435-8510"},"institutions":[{"id":"https://openalex.org/I4405392","display_name":"Bo\u011fazi\u00e7i University","ror":"https://ror.org/03z9tma90","country_code":"TR","type":"education","lineage":["https://openalex.org/I4405392"]}],"countries":["TR"],"is_corresponding":false,"raw_author_name":"Murat Saraclar","raw_affiliation_strings":["Department of Electrical, and Electronics Engineering, Bogazici University, Istanbul, Turkey"],"affiliations":[{"raw_affiliation_string":"Department of Electrical, and Electronics Engineering, Bogazici University, Istanbul, Turkey","institution_ids":["https://openalex.org/I4405392"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5015639730"],"corresponding_institution_ids":["https://openalex.org/I4405392"],"apc_list":null,"apc_paid":null,"fwci":1.2342,"has_fulltext":false,"cited_by_count":9,"citation_normalized_percentile":{"value":0.78959337,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":"29","issue":null,"first_page":"1912","last_page":"1924"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/timit","display_name":"TIMIT","score":0.8517493009567261},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8019273281097412},{"id":"https://openalex.org/keywords/phone","display_name":"Phone","score":0.7208056449890137},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.6886312365531921},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5738131999969482},{"id":"https://openalex.org/keywords/sensor-fusion","display_name":"Sensor fusion","score":0.5014603137969971},{"id":"https://openalex.org/keywords/modality","display_name":"Modality (human\u2013computer interaction)","score":0.49796533584594727},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4832661747932434},{"id":"https://openalex.org/keywords/pooling","display_name":"Pooling","score":0.4162519872188568},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.41059818863868713},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3760577440261841},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.35248667001724243},{"id":"https://openalex.org/keywords/hidden-markov-model","display_name":"Hidden Markov model","score":0.18290576338768005}],"concepts":[{"id":"https://openalex.org/C2778724510","wikidata":"https://www.wikidata.org/wiki/Q7670405","display_name":"TIMIT","level":3,"score":0.8517493009567261},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8019273281097412},{"id":"https://openalex.org/C2778707766","wikidata":"https://www.wikidata.org/wiki/Q202064","display_name":"Phone","level":2,"score":0.7208056449890137},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6886312365531921},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5738131999969482},{"id":"https://openalex.org/C33954974","wikidata":"https://www.wikidata.org/wiki/Q486494","display_name":"Sensor fusion","level":2,"score":0.5014603137969971},{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.49796533584594727},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4832661747932434},{"id":"https://openalex.org/C70437156","wikidata":"https://www.wikidata.org/wiki/Q7228652","display_name":"Pooling","level":2,"score":0.4162519872188568},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.41059818863868713},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3760577440261841},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.35248667001724243},{"id":"https://openalex.org/C23224414","wikidata":"https://www.wikidata.org/wiki/Q176769","display_name":"Hidden Markov model","level":2,"score":0.18290576338768005},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/taslp.2021.3084099","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2021.3084099","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/10","display_name":"Reduced inequalities","score":0.5099999904632568},{"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions","score":0.4399999976158142}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":42,"referenced_works":["https://openalex.org/W1499800345","https://openalex.org/W1522301498","https://openalex.org/W1524333225","https://openalex.org/W1539811621","https://openalex.org/W1602485673","https://openalex.org/W1677182931","https://openalex.org/W1988550205","https://openalex.org/W2028971373","https://openalex.org/W2038420319","https://openalex.org/W2052382192","https://openalex.org/W2055098062","https://openalex.org/W2066381494","https://openalex.org/W2083792893","https://openalex.org/W2086261631","https://openalex.org/W2096391593","https://openalex.org/W2101816108","https://openalex.org/W2104997912","https://openalex.org/W2143612262","https://openalex.org/W2160006372","https://openalex.org/W2182927573","https://openalex.org/W2318168293","https://openalex.org/W2529316201","https://openalex.org/W2745364270","https://openalex.org/W2747580863","https://openalex.org/W2801248077","https://openalex.org/W2884768626","https://openalex.org/W2890563690","https://openalex.org/W2912844525","https://openalex.org/W2936504074","https://openalex.org/W2938626520","https://openalex.org/W2964121744","https://openalex.org/W2964227577","https://openalex.org/W2972691962","https://openalex.org/W2986592617","https://openalex.org/W3015578284","https://openalex.org/W3095345107","https://openalex.org/W3098280677","https://openalex.org/W6631190155","https://openalex.org/W6631362777","https://openalex.org/W6635966985","https://openalex.org/W6675604347","https://openalex.org/W6685993629"],"related_works":["https://openalex.org/W80423236","https://openalex.org/W2953234277","https://openalex.org/W2626256601","https://openalex.org/W2900413183","https://openalex.org/W4390975304","https://openalex.org/W147410782","https://openalex.org/W3022252430","https://openalex.org/W4287804464","https://openalex.org/W1993406137","https://openalex.org/W2038050265"],"abstract_inverted_index":{"Representations":[0],"for":[1,27,186,228],"data":[2,8,41,50,59,105,122,130,155],"subunits":[3],"can":[4],"help":[5,236],"with":[6,48,147,234],"recent":[7],"accumulation":[9],"by":[10],"enabling":[11],"efficient":[12],"storage":[13],"and":[14,30,71,76,93,141,169,199,225],"retrieval":[15],"systems.":[16],"In":[17,159],"this":[18,223],"paper,":[19],"we":[20,206],"investigate":[21],"the":[22,54,72,83,121,154,162,187,195,235],"problem":[23],"of":[24,38,237],"representation":[25],"generation":[26],"phone":[28,84,164,176,188,196],"classification":[29,85,165,189],"cross-modal":[31],"same-different":[32,210,230],"word":[33,211],"discrimination":[34,212],"tasks.":[35],"The":[36,110],"benefits":[37],"utilizing":[39],"multimodal":[40,170],"on":[42,78,179,214],"these":[43,94],"tasks":[44],"are":[45,91,97,172,191],"examined":[46],"together":[47],"different":[49,58,101,167,219],"fusion":[51,106,131,143],"schemes.":[52],"Mainly,":[53],"paper":[55],"considers":[56],"two":[57,87,100,108,218],"modalities,":[60],"upper":[61],"airway":[62],"mid-sagittal":[63],"plane":[64],"real-time":[65],"magnetic":[66],"resonance":[67],"imaging":[68],"(rtMRI)":[69],"videos":[70],"corresponding":[73],"speech":[74],"waveforms,":[75],"experiments":[77],"USC-TIMIT":[79,180],"rtMRI":[80,181],"dataset.":[81,182],"For":[82],"task,":[86,198,224],"unimodal":[88,139,168],"neural":[89],"networks":[90,112],"designed,":[92],"separate":[95],"systems":[96,171],"merged":[98],"in":[99,114,117,144,156,194],"ways":[102],"that":[103,129,227],"provide":[104],"between":[107],"modalities.":[109],"proposed":[111,163],"differ":[113],"their":[115,200],"stages":[116,146],"which":[118],"they":[119],"perform":[120],"fusion.":[123],"As":[124],"hypothesized,":[125],"our":[126],"results":[127,151,178],"show":[128],"indeed":[132],"brings":[133],"a":[134,208,243],"performance":[135],"improvement":[136],"over":[137],"both":[138],"approaches,":[140],"performing":[142],"earlier":[145],"cross-connections":[148],"yields":[149,239],"better":[150,240],"than":[152,242],"fusing":[153],"later":[157],"stages.":[158],"addition":[160],"to":[161,174,221],"schemes,":[166],"designed":[173],"obtain":[175],"recognition":[177,197],"Phone":[183],"representations":[184,233],"generated":[185],"task":[190,213],"also":[192],"utilized":[193],"representative":[201],"power":[202],"is":[203],"illustrated.":[204],"Finally,":[205],"define":[207],"cross-view":[209,229],"USC-TIMIT.":[215],"We":[216],"propose":[217],"schemes":[220],"tackle":[222],"find":[226],"discrimination,":[231],"generating":[232],"cross-modality":[238],"accuracy":[241],"system":[244],"employing":[245],"independently":[246],"created":[247],"representations.":[248]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":3},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":1}],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-10-10T00:00:00"}
