{"id":"https://openalex.org/W2608612081","doi":"https://doi.org/10.21437/interspeech.2017-1689","title":"Learning Weakly Supervised Multimodal Phoneme Embeddings","display_name":"Learning Weakly Supervised Multimodal Phoneme Embeddings","publication_year":2017,"publication_date":"2017-08-16","ids":{"openalex":"https://openalex.org/W2608612081","doi":"https://doi.org/10.21437/interspeech.2017-1689","mag":"2608612081"},"language":"en","primary_location":{"id":"doi:10.21437/interspeech.2017-1689","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2017-1689","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2017","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/1704.06913","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5022256197","display_name":"Rahma Chaabouni","orcid":"https://orcid.org/0000-0002-9196-1397"},"institutions":[{"id":"https://openalex.org/I4210151031","display_name":"Laboratoire de Sciences Cognitives et Psycholinguistique","ror":"https://ror.org/05fvhm231","country_code":"FR","type":"facility","lineage":["https://openalex.org/I1294671590","https://openalex.org/I1294671590","https://openalex.org/I2746051580","https://openalex.org/I29607241","https://openalex.org/I4210096427","https://openalex.org/I4210151031","https://openalex.org/I90669466"]}],"countries":["FR"],"is_corresponding":true,"raw_author_name":"Rahma Chaabouni","raw_affiliation_strings":["Laboratoire de sciences cognitives et psycholinguistique"],"affiliations":[{"raw_affiliation_string":"Laboratoire de sciences cognitives et psycholinguistique","institution_ids":["https://openalex.org/I4210151031"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5069239428","display_name":"Ewan Dunbar","orcid":"https://orcid.org/0000-0001-9603-953X"},"institutions":[{"id":"https://openalex.org/I90669466","display_name":"\u00c9cole des hautes \u00e9tudes en sciences sociales","ror":"https://ror.org/02d9dg697","country_code":"FR","type":"facility","lineage":["https://openalex.org/I90669466"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Ewan Dunbar","raw_affiliation_strings":["\u00c9cole des hautes \u00e9tudes en sciences sociales"],"affiliations":[{"raw_affiliation_string":"\u00c9cole des hautes \u00e9tudes en sciences sociales","institution_ids":["https://openalex.org/I90669466"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047639590","display_name":"Neil Zeghidour","orcid":"https://orcid.org/0000-0001-6896-3987"},"institutions":[{"id":"https://openalex.org/I277688954","display_name":"Universit\u00e9 Paris-Saclay","ror":"https://ror.org/03xjwb503","country_code":"FR","type":"education","lineage":["https://openalex.org/I277688954"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Neil Zeghidour","raw_affiliation_strings":["Universit\u00e9 Paris-Saclay"],"affiliations":[{"raw_affiliation_string":"Universit\u00e9 Paris-Saclay","institution_ids":["https://openalex.org/I277688954"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5007620149","display_name":"Emmanuel Dupoux","orcid":"https://orcid.org/0000-0002-7814-2952"},"institutions":[{"id":"https://openalex.org/I4210151031","display_name":"Laboratoire de Sciences Cognitives et Psycholinguistique","ror":"https://ror.org/05fvhm231","country_code":"FR","type":"facility","lineage":["https://openalex.org/I1294671590","https://openalex.org/I1294671590","https://openalex.org/I2746051580","https://openalex.org/I29607241","https://openalex.org/I4210096427","https://openalex.org/I4210151031","https://openalex.org/I90669466"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Emmanuel Dupoux","raw_affiliation_strings":["Laboratoire de sciences cognitives et psycholinguistique"],"affiliations":[{"raw_affiliation_string":"Laboratoire de sciences cognitives et psycholinguistique","institution_ids":["https://openalex.org/I4210151031"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5022256197"],"corresponding_institution_ids":["https://openalex.org/I4210151031"],"apc_list":null,"apc_paid":null,"fwci":0.5603,"has_fulltext":true,"cited_by_count":3,"citation_normalized_percentile":{"value":0.63764545,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"2218","last_page":"2222"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12032","display_name":"Multisensory perception and integration","score":0.9955999851226807,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/concatenation","display_name":"Concatenation (mathematics)","score":0.8050025701522827},{"id":"https://openalex.org/keywords/modalities","display_name":"Modalities","score":0.7424142360687256},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7253249883651733},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.6384642124176025},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6294625997543335},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.5450050234794617},{"id":"https://openalex.org/keywords/phone","display_name":"Phone","score":0.5200024247169495},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5145484805107117},{"id":"https://openalex.org/keywords/feature-learning","display_name":"Feature learning","score":0.4854072034358978},{"id":"https://openalex.org/keywords/articulation","display_name":"Articulation (sociology)","score":0.4594586193561554},{"id":"https://openalex.org/keywords/modality","display_name":"Modality (human\u2013computer interaction)","score":0.44235700368881226},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.43467336893081665},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3994659185409546},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.10392296314239502},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.07006293535232544}],"concepts":[{"id":"https://openalex.org/C87619178","wikidata":"https://www.wikidata.org/wiki/Q126002","display_name":"Concatenation (mathematics)","level":2,"score":0.8050025701522827},{"id":"https://openalex.org/C2779903281","wikidata":"https://www.wikidata.org/wiki/Q6888026","display_name":"Modalities","level":2,"score":0.7424142360687256},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7253249883651733},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6384642124176025},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6294625997543335},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.5450050234794617},{"id":"https://openalex.org/C2778707766","wikidata":"https://www.wikidata.org/wiki/Q202064","display_name":"Phone","level":2,"score":0.5200024247169495},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5145484805107117},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.4854072034358978},{"id":"https://openalex.org/C2779337067","wikidata":"https://www.wikidata.org/wiki/Q4800961","display_name":"Articulation (sociology)","level":3,"score":0.4594586193561554},{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.44235700368881226},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.43467336893081665},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3994659185409546},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.10392296314239502},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.07006293535232544},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C36289849","wikidata":"https://www.wikidata.org/wiki/Q34749","display_name":"Social science","level":1,"score":0.0}],"mesh":[],"locations_count":5,"locations":[{"id":"doi:10.21437/interspeech.2017-1689","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2017-1689","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2017","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:1704.06913","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1704.06913","pdf_url":"https://arxiv.org/pdf/1704.06913","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"pmh:oai:HAL:hal-01687415v1","is_oa":false,"landing_page_url":"https://inria.hal.science/hal-01687415","pdf_url":null,"source":{"id":"https://openalex.org/S4306402512","display_name":"HAL (Le Centre pour la Communication Scientifique Directe)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1294671590","host_organization_name":"Centre National de la Recherche Scientifique","host_organization_lineage":["https://openalex.org/I1294671590"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Interspeech 2017, 2017, Stockholm, Sweden. &#x27E8;10.21437/Interspeech.2017-1689&#x27E9;","raw_type":"Conference papers"},{"id":"doi:10.48550/arxiv.1704.06913","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.1704.06913","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"},{"id":"mag:2608612081","is_oa":false,"landing_page_url":null,"pdf_url":null,"source":null,"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":null}],"best_oa_location":{"id":"pmh:oai:arXiv.org:1704.06913","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1704.06913","pdf_url":"https://arxiv.org/pdf/1704.06913","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[{"score":0.7200000286102295,"display_name":"Reduced inequalities","id":"https://metadata.un.org/sdg/10"}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2608612081.pdf","grobid_xml":"https://content.openalex.org/works/W2608612081.grobid-xml"},"referenced_works_count":23,"referenced_works":["https://openalex.org/W124086577","https://openalex.org/W1523385540","https://openalex.org/W1531883353","https://openalex.org/W1577418252","https://openalex.org/W1769974409","https://openalex.org/W2015394094","https://openalex.org/W2021279213","https://openalex.org/W2048616781","https://openalex.org/W2076462394","https://openalex.org/W2121836097","https://openalex.org/W2127589108","https://openalex.org/W2133564696","https://openalex.org/W2190506272","https://openalex.org/W2285716245","https://openalex.org/W2395899413","https://openalex.org/W2400549570","https://openalex.org/W2404799143","https://openalex.org/W2516890051","https://openalex.org/W2550980560","https://openalex.org/W2615059019","https://openalex.org/W2740606810","https://openalex.org/W2952746495","https://openalex.org/W3097096317"],"related_works":["https://openalex.org/W2963583362","https://openalex.org/W3015356123","https://openalex.org/W3040260790","https://openalex.org/W3099638501","https://openalex.org/W3123226376","https://openalex.org/W3032892481","https://openalex.org/W3207628370","https://openalex.org/W2963902314","https://openalex.org/W3017923487","https://openalex.org/W3197567540","https://openalex.org/W2171651891","https://openalex.org/W3162391496","https://openalex.org/W3094123278","https://openalex.org/W3209871323","https://openalex.org/W2786051023","https://openalex.org/W3136644942","https://openalex.org/W3159929162","https://openalex.org/W2991487804","https://openalex.org/W3201143670","https://openalex.org/W2973062889"],"abstract_inverted_index":{"Recent":[0],"works":[1],"have":[2],"explored":[3],"deep":[4],"architectures":[5],"for":[6,71,85,91,130],"learning":[7,96,113,127],"multimodal":[8,133],"speech":[9,30,87],"representation":[10,70],"(e.g.":[11],"audio":[12,33,188],"and":[13,16,34,49,82,88,132,151],"images,":[14],"articulation":[15],"audio)":[17],"in":[18,41,74,98,175],"a":[19,42,68,75,100,143],"supervised":[20,44,77],"way.":[21],"Here":[22],"we":[23,56,141],"investigate":[24],"the":[25,38,64,104,107,111,147,159],"role":[26],"of":[27,106,118,146,161,172],"combining":[28],"different":[29,116],"modalities,":[31,109],"i.e.":[32],"visual":[35,89,131,155],"information":[36],"representing":[37],"lips":[39],"movements,":[40],"weakly":[43,76],"way":[45],"using":[46],"Siamese":[47,101],"networks":[48],"lexical":[50],"same-different":[51],"side":[52],"information.":[53],"In":[54],"particular,":[55],"ask":[57],"whether":[58],"one":[59],"modality":[60],"can":[61,157],"benefit":[62],"from":[63],"other":[65],"to":[66,180],"provide":[67],"richer":[69],"phone":[72,92,149],"recognition":[73],"setting.":[78],"We":[79,123],"introduce":[80],"mono-task":[81,95],"multi-task":[83,112,126],"methods":[84],"merging":[86],"modalities":[90,119],"recognition.":[93],"The":[94],"consists":[97],"applying":[99],"network":[102],"on":[103,187],"concatenation":[105],"two":[108],"while":[110,135],"receives":[114],"several":[115],"combinations":[117],"at":[120],"train":[121],"time.":[122],"show":[124,152],"that":[125,153,177],"enhances":[128],"discriminability":[129,160],"inputs":[134],"minimally":[136],"impacting":[137],"auditory":[138],"inputs.":[139],"Furthermore,":[140],"present":[142],"qualitative":[144],"analysis":[145],"obtained":[148],"embeddings,":[150],"cross-modal":[154],"input":[156],"improve":[158],"phonological":[162],"features":[163,183],"which":[164],"are":[165,178],"visually":[166],"discernable":[167],"(rounding,":[168],"open/close,":[169],"labial":[170],"place":[171],"articulation),":[173],"resulting":[174],"representations":[176],"closer":[179],"abstract":[181],"linguistic":[182],"than":[184],"those":[185],"based":[186],"only.":[189]},"counts_by_year":[{"year":2019,"cited_by_count":2},{"year":2018,"cited_by_count":1}],"updated_date":"2026-03-10T16:38:18.471706","created_date":"2025-10-10T00:00:00"}
