{"id":"https://openalex.org/W3015578284","doi":"https://doi.org/10.1109/icassp40776.2020.9053322","title":"Recurrent Neural Audiovisual Word Embeddings for Synchronized Speech and Real-Time Mri","display_name":"Recurrent Neural Audiovisual Word Embeddings for Synchronized Speech and Real-Time Mri","publication_year":2020,"publication_date":"2020-04-09","ids":{"openalex":"https://openalex.org/W3015578284","doi":"https://doi.org/10.1109/icassp40776.2020.9053322","mag":"3015578284"},"language":"en","primary_location":{"id":"doi:10.1109/icassp40776.2020.9053322","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp40776.2020.9053322","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2020 - 2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5015639730","display_name":"\u00d6yk\u00fc Deniz K\u00f6se","orcid":"https://orcid.org/0000-0002-8685-2161"},"institutions":[{"id":"https://openalex.org/I4405392","display_name":"Bo\u011fazi\u00e7i University","ror":"https://ror.org/03z9tma90","country_code":"TR","type":"education","lineage":["https://openalex.org/I4405392"]}],"countries":["TR"],"is_corresponding":true,"raw_author_name":"Oyku Deniz Kose","raw_affiliation_strings":["Department of Electrical and Electronics Engineering, Bogazici University, Istanbul, Turkey"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Electronics Engineering, Bogazici University, Istanbul, Turkey","institution_ids":["https://openalex.org/I4405392"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5055086464","display_name":"Murat Sara\u00e7lar","orcid":"https://orcid.org/0000-0002-7435-8510"},"institutions":[{"id":"https://openalex.org/I4405392","display_name":"Bo\u011fazi\u00e7i University","ror":"https://ror.org/03z9tma90","country_code":"TR","type":"education","lineage":["https://openalex.org/I4405392"]}],"countries":["TR"],"is_corresponding":false,"raw_author_name":"Murat Saraclar","raw_affiliation_strings":["Department of Electrical and Electronics Engineering, Bogazici University, Istanbul, Turkey"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Electronics Engineering, Bogazici University, Istanbul, Turkey","institution_ids":["https://openalex.org/I4405392"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5015639730"],"corresponding_institution_ids":["https://openalex.org/I4405392"],"apc_list":null,"apc_paid":null,"fwci":0.1515,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.40078999,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":93},"biblio":{"volume":null,"issue":null,"first_page":"6424","last_page":"6428"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7935662269592285},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.7298797965049744},{"id":"https://openalex.org/keywords/recurrent-neural-network","display_name":"Recurrent neural network","score":0.7239089012145996},{"id":"https://openalex.org/keywords/dynamic-time-warping","display_name":"Dynamic time warping","score":0.6886262893676758},{"id":"https://openalex.org/keywords/margin","display_name":"Margin (machine learning)","score":0.5726130604743958},{"id":"https://openalex.org/keywords/autoencoder","display_name":"Autoencoder","score":0.5684300661087036},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.567116916179657},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5433067679405212},{"id":"https://openalex.org/keywords/timit","display_name":"TIMIT","score":0.4898695647716522},{"id":"https://openalex.org/keywords/modalities","display_name":"Modalities","score":0.46008017659187317},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.4583214819431305},{"id":"https://openalex.org/keywords/word-error-rate","display_name":"Word error rate","score":0.44656071066856384},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.42554765939712524},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.38309168815612793},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3697003722190857},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.2572195529937744},{"id":"https://openalex.org/keywords/hidden-markov-model","display_name":"Hidden Markov model","score":0.1490771770477295},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.09621450304985046}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7935662269592285},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.7298797965049744},{"id":"https://openalex.org/C147168706","wikidata":"https://www.wikidata.org/wiki/Q1457734","display_name":"Recurrent neural network","level":3,"score":0.7239089012145996},{"id":"https://openalex.org/C88516994","wikidata":"https://www.wikidata.org/wiki/Q1268863","display_name":"Dynamic time warping","level":2,"score":0.6886262893676758},{"id":"https://openalex.org/C774472","wikidata":"https://www.wikidata.org/wiki/Q6760393","display_name":"Margin (machine learning)","level":2,"score":0.5726130604743958},{"id":"https://openalex.org/C101738243","wikidata":"https://www.wikidata.org/wiki/Q786435","display_name":"Autoencoder","level":3,"score":0.5684300661087036},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.567116916179657},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5433067679405212},{"id":"https://openalex.org/C2778724510","wikidata":"https://www.wikidata.org/wiki/Q7670405","display_name":"TIMIT","level":3,"score":0.4898695647716522},{"id":"https://openalex.org/C2779903281","wikidata":"https://www.wikidata.org/wiki/Q6888026","display_name":"Modalities","level":2,"score":0.46008017659187317},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.4583214819431305},{"id":"https://openalex.org/C40969351","wikidata":"https://www.wikidata.org/wiki/Q3516228","display_name":"Word error rate","level":2,"score":0.44656071066856384},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.42554765939712524},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.38309168815612793},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3697003722190857},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2572195529937744},{"id":"https://openalex.org/C23224414","wikidata":"https://www.wikidata.org/wiki/Q176769","display_name":"Hidden Markov model","level":2,"score":0.1490771770477295},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.09621450304985046},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C36289849","wikidata":"https://www.wikidata.org/wiki/Q34749","display_name":"Social science","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp40776.2020.9053322","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp40776.2020.9053322","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2020 - 2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Reduced inequalities","id":"https://metadata.un.org/sdg/10","score":0.5199999809265137},{"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16","score":0.46000000834465027}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":46,"referenced_works":["https://openalex.org/W1496120315","https://openalex.org/W1499800345","https://openalex.org/W1562955078","https://openalex.org/W2028971373","https://openalex.org/W2059652594","https://openalex.org/W2066381494","https://openalex.org/W2096733369","https://openalex.org/W2101816108","https://openalex.org/W2129625650","https://openalex.org/W2137010615","https://openalex.org/W2138621090","https://openalex.org/W2160006372","https://openalex.org/W2170738476","https://openalex.org/W2182927573","https://openalex.org/W2190506272","https://openalex.org/W2291770225","https://openalex.org/W2318168293","https://openalex.org/W2402610474","https://openalex.org/W2550241133","https://openalex.org/W2556930864","https://openalex.org/W2566587499","https://openalex.org/W2801248077","https://openalex.org/W2890563690","https://openalex.org/W2912844525","https://openalex.org/W2936504074","https://openalex.org/W2951216052","https://openalex.org/W2951359136","https://openalex.org/W2962736743","https://openalex.org/W2962753610","https://openalex.org/W2962862718","https://openalex.org/W2962978519","https://openalex.org/W2963115079","https://openalex.org/W2963571336","https://openalex.org/W2964222437","https://openalex.org/W3099206234","https://openalex.org/W6633682082","https://openalex.org/W6665204316","https://openalex.org/W6679376003","https://openalex.org/W6680628865","https://openalex.org/W6685160515","https://openalex.org/W6685993629","https://openalex.org/W6713398208","https://openalex.org/W6729855024","https://openalex.org/W6729977899","https://openalex.org/W6731763572","https://openalex.org/W6747045456"],"related_works":["https://openalex.org/W2994894110","https://openalex.org/W4377862891","https://openalex.org/W2005708641","https://openalex.org/W3009759344","https://openalex.org/W1984867078","https://openalex.org/W80018097","https://openalex.org/W2050397613","https://openalex.org/W2100729928","https://openalex.org/W2964207879","https://openalex.org/W2080458333"],"abstract_inverted_index":{"In":[0,23,60,97],"this":[1,24],"paper,":[2],"the":[3,9,41,47,64,74,84,89,98,129,137,142,156],"use":[4],"of":[5,40,109,124],"word":[6,26,65,158],"embeddings":[7,27,66,116,159],"for":[8,54,67,73,118],"segments":[10],"found":[11],"in":[12],"audio":[13],"and":[14,32,36,57],"real-time":[15],"magnetic":[16],"resonance":[17],"imaging":[18],"(rtMRI)":[19],"videos":[20],"is":[21,44,52,81,112,131],"addressed.":[22],"study,":[25],"are":[28,91],"created":[29],"to":[30,62,88,114,128],"store":[31],"retrieve":[33],"data":[34,43,70,108,120],"efficiently,":[35],"their":[37],"representation":[38],"power":[39],"original":[42],"evaluated":[45],"by":[46,149],"same-different":[48],"word-discrimination":[49],"task":[50],"that":[51,155],"defined":[53],"both":[55,119],"unimodal":[56,75,166],"cross-view":[58,99,168],"settings.":[59],"order":[61],"create":[63],"two":[68],"different":[69,110],"modalities":[71],"independently":[72],"setting,":[76,100],"a":[77,94,101,150,162],"Siamese":[78],"neural":[79,103],"network":[80,90,104],"designed.":[82],"For":[83],"rtMRI":[85,139],"videos,":[86],"inputs":[87,107],"generated":[92],"through":[93],"correspondence":[95],"autoencoder.":[96],"recurrent":[102],"(RNN),":[105],"which":[106],"modalities,":[111],"trained":[113],"generate":[115],"jointly":[117],"sources.":[121],"The":[122,134],"problem":[123],"objective":[125],"function":[126],"selection":[127],"RNN":[130],"also":[132],"investigated.":[133],"results":[135],"on":[136],"USC-TIMIT":[138],"dataset":[140],"outperform":[141],"conventional":[143],"dynamic":[144],"time":[145],"warping":[146],"(DTW)":[147],"baseline":[148],"clear":[151],"margin.":[152],"Outcomes":[153],"demonstrate":[154],"proposed":[157],"can":[160],"be":[161],"step":[163],"towards":[164],"faster":[165],"or":[167],"queryby-example":[169],"search":[170],"tasks.":[171]},"counts_by_year":[{"year":2021,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
