{"id":"https://openalex.org/W4375869060","doi":"https://doi.org/10.1109/icassp49357.2023.10096099","title":"Analyzing Acoustic Word Embeddings from Pre-Trained Self-Supervised Speech Models","display_name":"Analyzing Acoustic Word Embeddings from Pre-Trained Self-Supervised Speech Models","publication_year":2023,"publication_date":"2023-05-05","ids":{"openalex":"https://openalex.org/W4375869060","doi":"https://doi.org/10.1109/icassp49357.2023.10096099"},"language":"en","primary_location":{"id":"doi:10.1109/icassp49357.2023.10096099","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49357.2023.10096099","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://www.research.ed.ac.uk/files/334513114/Analyzing_Acoustic_SANABRIA_DOA15022023_AFV_CC_BY.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5053704856","display_name":"Ramon Sanabria","orcid":null},"institutions":[{"id":"https://openalex.org/I98677209","display_name":"University of Edinburgh","ror":"https://ror.org/01nrxwf90","country_code":"GB","type":"education","lineage":["https://openalex.org/I98677209"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Ramon Sanabria","raw_affiliation_strings":["The University of Edinburgh"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"The University of Edinburgh","institution_ids":["https://openalex.org/I98677209"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100662187","display_name":"Hao Tang","orcid":"https://orcid.org/0000-0002-2445-2605"},"institutions":[{"id":"https://openalex.org/I98677209","display_name":"University of Edinburgh","ror":"https://ror.org/01nrxwf90","country_code":"GB","type":"education","lineage":["https://openalex.org/I98677209"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Hao Tang","raw_affiliation_strings":["The University of Edinburgh"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"The University of Edinburgh","institution_ids":["https://openalex.org/I98677209"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5075564798","display_name":"Sharon Goldwater","orcid":"https://orcid.org/0000-0002-7298-0947"},"institutions":[{"id":"https://openalex.org/I98677209","display_name":"University of Edinburgh","ror":"https://ror.org/01nrxwf90","country_code":"GB","type":"education","lineage":["https://openalex.org/I98677209"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Sharon Goldwater","raw_affiliation_strings":["The University of Edinburgh"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"The University of Edinburgh","institution_ids":["https://openalex.org/I98677209"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":2.7737,"has_fulltext":false,"cited_by_count":17,"citation_normalized_percentile":{"value":0.92105445,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":95,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9973000288009644,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/pooling","display_name":"Pooling","score":0.8850311040878296},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7579572796821594},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.6725119352340698},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.6635392904281616},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6063926219940186},{"id":"https://openalex.org/keywords/mandarin-chinese","display_name":"Mandarin Chinese","score":0.6062723994255066},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5215468406677246},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5065339803695679},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.19609573483467102}],"concepts":[{"id":"https://openalex.org/C70437156","wikidata":"https://www.wikidata.org/wiki/Q7228652","display_name":"Pooling","level":2,"score":0.8850311040878296},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7579572796821594},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6725119352340698},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.6635392904281616},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6063926219940186},{"id":"https://openalex.org/C138954614","wikidata":"https://www.wikidata.org/wiki/Q9192","display_name":"Mandarin Chinese","level":2,"score":0.6062723994255066},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5215468406677246},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5065339803695679},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.19609573483467102},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/icassp49357.2023.10096099","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49357.2023.10096099","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},{"id":"pmh:oai:pure.ed.ac.uk:openaire/12055782-f556-48d4-986f-2c7e824b9f77","is_oa":true,"landing_page_url":"https://www.research.ed.ac.uk/files/334513114/Analyzing_Acoustic_SANABRIA_DOA15022023_AFV_CC_BY.pdf","pdf_url":null,"source":{"id":"https://openalex.org/S4306400321","display_name":"Edinburgh Research Explorer (University of Edinburgh)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I98677209","host_organization_name":"University of Edinburgh","host_organization_lineage":["https://openalex.org/I98677209"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Sanabria Teixidor, R, Tang, H & Goldwater, S 2023, Analyzing acoustic word embeddings from pre-trained self-supervised speech models. in 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). International Conference on Acoustics, Speech, and Signal Processing (ICASSP), Institute of Electrical and Electronics Engineers, 2023 IEEE International Conference on Acoustics, Speech and Signal Processing, Rhodes Island, Greece, 4/06/23. https://doi.org/10.1109/ICASSP49357.2023.10096099","raw_type":"contributionToPeriodical"},{"id":"pmh:oai:pure.ed.ac.uk:publications/12055782-f556-48d4-986f-2c7e824b9f77","is_oa":true,"landing_page_url":"https://www.research.ed.ac.uk/en/publications/12055782-f556-48d4-986f-2c7e824b9f77","pdf_url":null,"source":{"id":"https://openalex.org/S4306400321","display_name":"Edinburgh Research Explorer (University of Edinburgh)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I98677209","host_organization_name":"University of Edinburgh","host_organization_lineage":["https://openalex.org/I98677209"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Sanabria Teixidor, R, Tang, H & Goldwater, S 2023, Analyzing acoustic word embeddings from pre-trained self-supervised speech models. in 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). International Conference on Acoustics, Speech, and Signal Processing (ICASSP), Institute of Electrical and Electronics Engineers, 2023 IEEE International Conference on Acoustics, Speech and Signal Processing, Rhodes Island, Greece, 4/06/23. https://doi.org/10.1109/ICASSP49357.2023.10096099","raw_type":"contributionToPeriodical"}],"best_oa_location":{"id":"pmh:oai:pure.ed.ac.uk:openaire/12055782-f556-48d4-986f-2c7e824b9f77","is_oa":true,"landing_page_url":"https://www.research.ed.ac.uk/files/334513114/Analyzing_Acoustic_SANABRIA_DOA15022023_AFV_CC_BY.pdf","pdf_url":null,"source":{"id":"https://openalex.org/S4306400321","display_name":"Edinburgh Research Explorer (University of Edinburgh)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I98677209","host_organization_name":"University of Edinburgh","host_organization_lineage":["https://openalex.org/I98677209"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Sanabria Teixidor, R, Tang, H & Goldwater, S 2023, Analyzing acoustic word embeddings from pre-trained self-supervised speech models. in 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). International Conference on Acoustics, Speech, and Signal Processing (ICASSP), Institute of Electrical and Electronics Engineers, 2023 IEEE International Conference on Acoustics, Speech and Signal Processing, Rhodes Island, Greece, 4/06/23. https://doi.org/10.1109/ICASSP49357.2023.10096099","raw_type":"contributionToPeriodical"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/10","score":0.5299999713897705,"display_name":"Reduced inequalities"},{"id":"https://metadata.un.org/sdg/16","score":0.4399999976158142,"display_name":"Peace, Justice and strong institutions"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":37,"referenced_works":["https://openalex.org/W1494198834","https://openalex.org/W2004409624","https://openalex.org/W2044138293","https://openalex.org/W2057007397","https://openalex.org/W2059652594","https://openalex.org/W2097207027","https://openalex.org/W2126203737","https://openalex.org/W2145410271","https://openalex.org/W2190506272","https://openalex.org/W2407151108","https://openalex.org/W2468716020","https://openalex.org/W2584414011","https://openalex.org/W2842511635","https://openalex.org/W2889313720","https://openalex.org/W2962736743","https://openalex.org/W2962980711","https://openalex.org/W2963620343","https://openalex.org/W2963720603","https://openalex.org/W2964169922","https://openalex.org/W2995181338","https://openalex.org/W3036601975","https://openalex.org/W3095706145","https://openalex.org/W3098643042","https://openalex.org/W3110761489","https://openalex.org/W3146777637","https://openalex.org/W3150635893","https://openalex.org/W3209059054","https://openalex.org/W4223486244","https://openalex.org/W4226380987","https://openalex.org/W4287173589","https://openalex.org/W4297808394","https://openalex.org/W4319862670","https://openalex.org/W6732782712","https://openalex.org/W6780218876","https://openalex.org/W6786885278","https://openalex.org/W6795952400","https://openalex.org/W6849880362"],"related_works":["https://openalex.org/W2374317326","https://openalex.org/W2990005675","https://openalex.org/W1603321096","https://openalex.org/W2394766824","https://openalex.org/W2078713291","https://openalex.org/W2361574037","https://openalex.org/W2386292991","https://openalex.org/W2364440891","https://openalex.org/W2393726922","https://openalex.org/W2366752344"],"abstract_inverted_index":{"Given":[0],"the":[1,50,88,91,114],"strong":[2],"results":[3],"of":[4,53,90],"self-supervised":[5,17,46,54],"models":[6,38],"on":[7,74,93,102,107,124],"various":[8],"tasks,":[9],"there":[10],"have":[11],"been":[12],"surprisingly":[13],"few":[14],"studies":[15],"exploring":[16],"representations":[18,84,105],"for":[19,42,69],"acoustic":[20],"word":[21,29,77],"embeddings":[22],"(AWE),":[23],"fixed-dimensional":[24],"vectors":[25],"representing":[26],"variable-length":[27],"spoken":[28],"segments.":[30],"In":[31],"this":[32],"work,":[33],"we":[34,56,80],"study":[35],"several":[36],"pre-trained":[37],"and":[39,110],"pooling":[40,60],"methods":[41],"constructing":[43,70],"AWEs":[44],"with":[45,85],"representations.":[47],"Owing":[48],"to":[49],"contextualized":[51],"nature":[52],"representations,":[55],"hy-pothesize":[57],"that":[58,82],"simple":[59],"methods,":[61],"such":[62],"as":[63,120],"averaging,":[64],"might":[65],"already":[66],"be":[67],"useful":[68],"AWEs.":[71,95],"When":[72],"evaluating":[73],"a":[75],"standard":[76],"discrimination":[78],"task,":[79],"find":[81],"HuBERT":[83,104],"mean-pooling":[86],"rival":[87],"state":[89],"art":[92],"English":[94],"More":[96],"surprisingly,":[97],"despite":[98],"being":[99],"trained":[100,123],"only":[101],"English,":[103],"evaluated":[106],"Xitsonga,":[108],"Mandarin,":[109],"French":[111],"consistently":[112],"outperform":[113],"multilingual":[115],"model":[116],"XLSR-53":[117],"(as":[118],"well":[119],"Wav2Vec":[121],"2.0":[122],"English).":[125]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":6},{"year":2024,"cited_by_count":5},{"year":2023,"cited_by_count":5}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
