{"id":"https://openalex.org/W2625455707","doi":"https://doi.org/10.18653/v1/k17-1037","title":"Encoding of phonology in a recurrent neural model of grounded speech","display_name":"Encoding of phonology in a recurrent neural model of grounded speech","publication_year":2017,"publication_date":"2017-01-01","ids":{"openalex":"https://openalex.org/W2625455707","doi":"https://doi.org/10.18653/v1/k17-1037","mag":"2625455707"},"language":"en","primary_location":{"id":"doi:10.18653/v1/k17-1037","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/k17-1037","pdf_url":"https://www.aclweb.org/anthology/K17-1037.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 21st Conference on Computational Natural Language\n          Learning (CoNLL 2017)","raw_type":"proceedings-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.aclweb.org/anthology/K17-1037.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Afra Alishahi","orcid":null},"institutions":[{"id":"https://openalex.org/I193700539","display_name":"Tilburg University","ror":"https://ror.org/04b8v1s79","country_code":"NL","type":"education","lineage":["https://openalex.org/I193700539"]}],"countries":["NL"],"is_corresponding":true,"raw_author_name":"Afra Alishahi","raw_affiliation_strings":["Tilburg University"],"affiliations":[{"raw_affiliation_string":"Tilburg University","institution_ids":["https://openalex.org/I193700539"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Marie Barking","orcid":null},"institutions":[{"id":"https://openalex.org/I193700539","display_name":"Tilburg University","ror":"https://ror.org/04b8v1s79","country_code":"NL","type":"education","lineage":["https://openalex.org/I193700539"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Marie Barking","raw_affiliation_strings":["Tilburg University"],"affiliations":[{"raw_affiliation_string":"Tilburg University","institution_ids":["https://openalex.org/I193700539"]}]},{"author_position":"last","author":{"id":null,"display_name":"Grzegorz Chrupa\u0142a","orcid":null},"institutions":[{"id":"https://openalex.org/I193700539","display_name":"Tilburg University","ror":"https://ror.org/04b8v1s79","country_code":"NL","type":"education","lineage":["https://openalex.org/I193700539"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Grzegorz Chrupa\u0142a","raw_affiliation_strings":["Tilburg University"],"affiliations":[{"raw_affiliation_string":"Tilburg University","institution_ids":["https://openalex.org/I193700539"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I193700539"],"apc_list":null,"apc_paid":null,"fwci":1.8691,"has_fulltext":true,"cited_by_count":19,"citation_normalized_percentile":{"value":0.86856674,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"368","last_page":"378"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9972000122070312,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10403","display_name":"Phonetics and Phonology Research","score":0.9950000047683716,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/utterance","display_name":"Utterance","score":0.5986999869346619},{"id":"https://openalex.org/keywords/recurrent-neural-network","display_name":"Recurrent neural network","score":0.597000002861023},{"id":"https://openalex.org/keywords/encoding","display_name":"Encoding (memory)","score":0.5702000260353088},{"id":"https://openalex.org/keywords/phonology","display_name":"Phonology","score":0.5295000076293945},{"id":"https://openalex.org/keywords/decoding-methods","display_name":"Decoding methods","score":0.5128999948501587},{"id":"https://openalex.org/keywords/salient","display_name":"Salient","score":0.4871000051498413},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.421099990606308},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.36410000920295715}],"concepts":[{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6972000002861023},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6671000123023987},{"id":"https://openalex.org/C2775852435","wikidata":"https://www.wikidata.org/wiki/Q258403","display_name":"Utterance","level":2,"score":0.5986999869346619},{"id":"https://openalex.org/C147168706","wikidata":"https://www.wikidata.org/wiki/Q1457734","display_name":"Recurrent neural network","level":3,"score":0.597000002861023},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.5702000260353088},{"id":"https://openalex.org/C148934300","wikidata":"https://www.wikidata.org/wiki/Q40998","display_name":"Phonology","level":2,"score":0.5295000076293945},{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.5128999948501587},{"id":"https://openalex.org/C2780719617","wikidata":"https://www.wikidata.org/wiki/Q1030752","display_name":"Salient","level":2,"score":0.4871000051498413},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.461899995803833},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.421099990606308},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3752000033855438},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.36410000920295715},{"id":"https://openalex.org/C151989614","wikidata":"https://www.wikidata.org/wiki/Q440370","display_name":"Mel-frequency cepstrum","level":3,"score":0.3578000068664551},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.3555000126361847},{"id":"https://openalex.org/C137584468","wikidata":"https://www.wikidata.org/wiki/Q35395","display_name":"Phonetics","level":2,"score":0.3305000066757202},{"id":"https://openalex.org/C155635449","wikidata":"https://www.wikidata.org/wiki/Q4674699","display_name":"Acoustic model","level":3,"score":0.30869999527931213},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.29789999127388},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.28459998965263367},{"id":"https://openalex.org/C91863865","wikidata":"https://www.wikidata.org/wiki/Q4349497","display_name":"Speech corpus","level":3,"score":0.28130000829696655},{"id":"https://openalex.org/C175202392","wikidata":"https://www.wikidata.org/wiki/Q2434543","display_name":"Time delay neural network","level":3,"score":0.26080000400543213},{"id":"https://openalex.org/C18555067","wikidata":"https://www.wikidata.org/wiki/Q8375051","display_name":"Joint (building)","level":2,"score":0.25850000977516174},{"id":"https://openalex.org/C13895895","wikidata":"https://www.wikidata.org/wiki/Q3270773","display_name":"Speech coding","level":2,"score":0.25699999928474426},{"id":"https://openalex.org/C2779227376","wikidata":"https://www.wikidata.org/wiki/Q6505497","display_name":"Layer (electronics)","level":2,"score":0.25429999828338623},{"id":"https://openalex.org/C23224414","wikidata":"https://www.wikidata.org/wiki/Q176769","display_name":"Hidden Markov model","level":2,"score":0.25290000438690186}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.18653/v1/k17-1037","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/k17-1037","pdf_url":"https://www.aclweb.org/anthology/K17-1037.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 21st Conference on Computational Natural Language\n          Learning (CoNLL 2017)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:1706.03815","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1706.03815","pdf_url":"https://arxiv.org/pdf/1706.03815","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"pmh:uvt:oai:tilburguniversity.edu:publications/4adf30c3-88d7-46dc-b41e-ea3048715f3c","is_oa":true,"landing_page_url":"https://research.tilburguniversity.edu/en/publications/4adf30c3-88d7-46dc-b41e-ea3048715f3c","pdf_url":null,"source":{"id":"https://openalex.org/S4306401843","display_name":"Data Archiving and Networked Services (DANS)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1322597698","host_organization_name":"Royal Netherlands Academy of Arts and Sciences","host_organization_lineage":["https://openalex.org/I1322597698"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Proceedings of the 21st Conference on Computational Natural Language Learning (CoNLL 2017), 368 - 378","raw_type":"info:eu-repo/semantics/conferencepaper"}],"best_oa_location":{"id":"doi:10.18653/v1/k17-1037","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/k17-1037","pdf_url":"https://www.aclweb.org/anthology/K17-1037.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 21st Conference on Computational Natural Language\n          Learning (CoNLL 2017)","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2625455707.pdf","grobid_xml":"https://content.openalex.org/works/W2625455707.grobid-xml"},"referenced_works_count":1,"referenced_works":["https://openalex.org/W3213502289"],"related_works":[],"abstract_inverted_index":{"We":[0,17,40,118],"study":[1],"the":[2,30,36,55,60,64,67,70,89,93,114,123,127,138,155],"representation":[3],"and":[4,24,28,32,63,77,136],"encoding":[5,133],"of":[6,14,44,66,69,92,108,134,150,161],"phonemes":[7,51,162],"in":[8,54,88,167],"a":[9,19,42,101,105,147],"recurrent":[10,116,129],"neural":[11],"network":[12,156],"model":[13,20],"grounded":[15],"speech.":[16],"use":[18],"which":[21],"processes":[22],"images":[23],"their":[25],"spoken":[26],"descriptions,":[27],"projects":[29],"visual":[31],"auditory":[33],"representations":[34,84,152],"into":[35],"same":[37],"semantic":[38],"space.":[39],"perform":[41],"number":[43],"analyses":[45],"on":[46],"how":[47],"information":[48,110],"about":[49],"individual":[50],"is":[52,111],"encoded":[53],"MFCC":[56],"features":[57],"extracted":[58],"from":[59],"speech":[61],"signal,":[62],"activations":[65],"layers":[68,91],"model.":[71],"Via":[72],"experiments":[73],"with":[74],"phoneme":[75,78,83,151],"decoding":[76],"discrimination":[79],"we":[80],"show":[81],"that":[82,122],"are":[85,98],"most":[86],"salient":[87],"lower":[90],"model,":[94],"where":[95],"low-level":[96],"signals":[97],"processed":[99],"at":[100,113],"fine-grained":[102],"level,":[103],"although":[104],"large":[106],"amount":[107],"phonological":[109],"retain":[112],"top":[115,128],"layer.":[117],"further":[119],"find":[120],"out":[121],"attention":[124],"mechanism":[125],"following":[126],"layer":[130],"significantly":[131],"attenuates":[132],"phonology":[135],"makes":[137],"utterance":[139],"embeddings":[140],"much":[141],"more":[142],"invariant":[143],"to":[144,164],"synonymy.":[145],"Moreover,":[146],"hierarchical":[148],"clustering":[149],"learned":[153],"by":[154],"shows":[157],"an":[158],"organizational":[159],"structure":[160],"similar":[163],"those":[165],"proposed":[166],"linguistics.":[168]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":3},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":3},{"year":2019,"cited_by_count":6},{"year":2018,"cited_by_count":1}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2017-06-23T00:00:00"}
