{"id":"https://openalex.org/W2477779855","doi":"https://doi.org/10.21437/interspeech.2016-945","title":"Speakers In The Wild (SITW): The QUT Speaker Recognition System","display_name":"Speakers In The Wild (SITW): The QUT Speaker Recognition System","publication_year":2016,"publication_date":"2016-08-28","ids":{"openalex":"https://openalex.org/W2477779855","doi":"https://doi.org/10.21437/interspeech.2016-945","mag":"2477779855"},"language":"en","primary_location":{"id":"doi:10.21437/interspeech.2016-945","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2016-945","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2016","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://eprints.qut.edu.au/96310/29/Interspeech2016-SITW_paper.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5012849807","display_name":"Houman Ghaemmaghami","orcid":null},"institutions":[{"id":"https://openalex.org/I160993911","display_name":"Queensland University of Technology","ror":"https://ror.org/03pnv4752","country_code":"AU","type":"education","lineage":["https://openalex.org/I160993911"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"H. Ghaemmaghami","raw_affiliation_strings":["Speech and Audio Research Laboratory, Queensland University of Technology, Brisbane, Australia"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Speech and Audio Research Laboratory, Queensland University of Technology, Brisbane, Australia","institution_ids":["https://openalex.org/I160993911"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100776664","display_name":"Md Hafizur Rahman","orcid":"https://orcid.org/0000-0001-6576-1554"},"institutions":[{"id":"https://openalex.org/I160993911","display_name":"Queensland University of Technology","ror":"https://ror.org/03pnv4752","country_code":"AU","type":"education","lineage":["https://openalex.org/I160993911"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"M.H. Rahman","raw_affiliation_strings":["Speech and Audio Research Laboratory, Queensland University of Technology, Brisbane, Australia"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Speech and Audio Research Laboratory, Queensland University of Technology, Brisbane, Australia","institution_ids":["https://openalex.org/I160993911"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5039529498","display_name":"Ivan Himawan","orcid":"https://orcid.org/0000-0003-3848-244X"},"institutions":[{"id":"https://openalex.org/I160993911","display_name":"Queensland University of Technology","ror":"https://ror.org/03pnv4752","country_code":"AU","type":"education","lineage":["https://openalex.org/I160993911"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Ivan Himawan","raw_affiliation_strings":["Speech and Audio Research Laboratory, Queensland University of Technology, Brisbane, Australia"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Speech and Audio Research Laboratory, Queensland University of Technology, Brisbane, Australia","institution_ids":["https://openalex.org/I160993911"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5000014182","display_name":"David Dean","orcid":"https://orcid.org/0000-0003-1274-9775"},"institutions":[{"id":"https://openalex.org/I160993911","display_name":"Queensland University of Technology","ror":"https://ror.org/03pnv4752","country_code":"AU","type":"education","lineage":["https://openalex.org/I160993911"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"David Dean","raw_affiliation_strings":["Speech and Audio Research Laboratory, Queensland University of Technology, Brisbane, Australia"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Speech and Audio Research Laboratory, Queensland University of Technology, Brisbane, Australia","institution_ids":["https://openalex.org/I160993911"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5023434638","display_name":"Ahilan Kanagasundaram","orcid":"https://orcid.org/0000-0002-0533-7986"},"institutions":[{"id":"https://openalex.org/I160993911","display_name":"Queensland University of Technology","ror":"https://ror.org/03pnv4752","country_code":"AU","type":"education","lineage":["https://openalex.org/I160993911"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Ahilan Kanagasundaram","raw_affiliation_strings":["Speech and Audio Research Laboratory, Queensland University of Technology, Brisbane, Australia"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Speech and Audio Research Laboratory, Queensland University of Technology, Brisbane, Australia","institution_ids":["https://openalex.org/I160993911"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5055128383","display_name":"Sridha Sridharan","orcid":"https://orcid.org/0000-0003-4316-9001"},"institutions":[{"id":"https://openalex.org/I160993911","display_name":"Queensland University of Technology","ror":"https://ror.org/03pnv4752","country_code":"AU","type":"education","lineage":["https://openalex.org/I160993911"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Sridha Sridharan","raw_affiliation_strings":["Speech and Audio Research Laboratory, Queensland University of Technology, Brisbane, Australia"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Speech and Audio Research Laboratory, Queensland University of Technology, Brisbane, Australia","institution_ids":["https://openalex.org/I160993911"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5034095159","display_name":"Clinton Fookes","orcid":"https://orcid.org/0000-0002-8515-6324"},"institutions":[{"id":"https://openalex.org/I160993911","display_name":"Queensland University of Technology","ror":"https://ror.org/03pnv4752","country_code":"AU","type":"education","lineage":["https://openalex.org/I160993911"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Clinton Fookes","raw_affiliation_strings":["Speech and Audio Research Laboratory, Queensland University of Technology, Brisbane, Australia"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Speech and Audio Research Laboratory, Queensland University of Technology, Brisbane, Australia","institution_ids":["https://openalex.org/I160993911"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I160993911"],"apc_list":null,"apc_paid":null,"fwci":0.433,"has_fulltext":true,"cited_by_count":5,"citation_normalized_percentile":{"value":0.78510407,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"838","last_page":"842"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8056319952011108},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.721428394317627},{"id":"https://openalex.org/keywords/speaker-recognition","display_name":"Speaker recognition","score":0.6565954685211182},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5708279013633728},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.537401556968689},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.5220234990119934},{"id":"https://openalex.org/keywords/feature-vector","display_name":"Feature vector","score":0.49240243434906006},{"id":"https://openalex.org/keywords/speaker-diarisation","display_name":"Speaker diarisation","score":0.4689962863922119},{"id":"https://openalex.org/keywords/ranking","display_name":"Ranking (information retrieval)","score":0.43775027990341187},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.4342251121997833},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.4336237907409668},{"id":"https://openalex.org/keywords/domain-adaptation","display_name":"Domain adaptation","score":0.4106200933456421},{"id":"https://openalex.org/keywords/mixture-model","display_name":"Mixture model","score":0.4105621874332428}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8056319952011108},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.721428394317627},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.6565954685211182},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5708279013633728},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.537401556968689},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.5220234990119934},{"id":"https://openalex.org/C83665646","wikidata":"https://www.wikidata.org/wiki/Q42139305","display_name":"Feature vector","level":2,"score":0.49240243434906006},{"id":"https://openalex.org/C149838564","wikidata":"https://www.wikidata.org/wiki/Q7574248","display_name":"Speaker diarisation","level":3,"score":0.4689962863922119},{"id":"https://openalex.org/C189430467","wikidata":"https://www.wikidata.org/wiki/Q7293293","display_name":"Ranking (information retrieval)","level":2,"score":0.43775027990341187},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.4342251121997833},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.4336237907409668},{"id":"https://openalex.org/C2776434776","wikidata":"https://www.wikidata.org/wiki/Q19246213","display_name":"Domain adaptation","level":3,"score":0.4106200933456421},{"id":"https://openalex.org/C61224824","wikidata":"https://www.wikidata.org/wiki/Q2260434","display_name":"Mixture model","level":2,"score":0.4105621874332428},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C95623464","wikidata":"https://www.wikidata.org/wiki/Q1096149","display_name":"Classifier (UML)","level":2,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.21437/interspeech.2016-945","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2016-945","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2016","raw_type":"proceedings-article"},{"id":"pmh:oai:eprints.qut.edu.au:96310","is_oa":true,"landing_page_url":"http://www.isca-speech.org/archive/Interspeech_2016/","pdf_url":"https://eprints.qut.edu.au/96310/29/Interspeech2016-SITW_paper.pdf","source":{"id":"https://openalex.org/S4306402607","display_name":"QUT ePrints (Queensland University of Technology)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I160993911","host_organization_name":"Queensland University of Technology","host_organization_lineage":["https://openalex.org/I160993911"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Proceedings of the 17th Annual Conference of the International Speech Communication Association (ISCA)","raw_type":"Chapter in Book, Report or Conference volume"}],"best_oa_location":{"id":"pmh:oai:eprints.qut.edu.au:96310","is_oa":true,"landing_page_url":"http://www.isca-speech.org/archive/Interspeech_2016/","pdf_url":"https://eprints.qut.edu.au/96310/29/Interspeech2016-SITW_paper.pdf","source":{"id":"https://openalex.org/S4306402607","display_name":"QUT ePrints (Queensland University of Technology)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I160993911","host_organization_name":"Queensland University of Technology","host_organization_lineage":["https://openalex.org/I160993911"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Proceedings of the 17th Annual Conference of the International Speech Communication Association (ISCA)","raw_type":"Chapter in Book, Report or Conference volume"},"sustainable_development_goals":[{"display_name":"Quality Education","score":0.5,"id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W2477779855.pdf"},"referenced_works_count":22,"referenced_works":["https://openalex.org/W37845981","https://openalex.org/W97072897","https://openalex.org/W866349134","https://openalex.org/W1480629789","https://openalex.org/W1524333225","https://openalex.org/W1532958172","https://openalex.org/W2015532879","https://openalex.org/W2039057510","https://openalex.org/W2117671523","https://openalex.org/W2129379984","https://openalex.org/W2136879537","https://openalex.org/W2150769028","https://openalex.org/W2173629880","https://openalex.org/W2184045248","https://openalex.org/W2290689761","https://openalex.org/W2395750323","https://openalex.org/W2398362606","https://openalex.org/W2406312423","https://openalex.org/W2407374891","https://openalex.org/W2516764878","https://openalex.org/W4231743502","https://openalex.org/W4234330420"],"related_works":["https://openalex.org/W2206035908","https://openalex.org/W1197719229","https://openalex.org/W4247736853","https://openalex.org/W2381158726","https://openalex.org/W2552734076","https://openalex.org/W2162158162","https://openalex.org/W2075383893","https://openalex.org/W106647055","https://openalex.org/W1992796048","https://openalex.org/W1493012537"],"abstract_inverted_index":{"This":[0,42],"paper":[1],"presents":[2],"the":[3,13,33,39,72,88,93,110,130,134,140,151,166,169,174,181,186],"QUT":[4],"speaker":[5,19,125,162,194],"recognition":[6,20,163,195],"system,":[7],"as":[8,99],"a":[9,53,81,101,191],"competing":[10],"system":[11,24,43,179],"in":[12,32,74,80,100,129,196],"Speakers":[14],"In":[15],"The":[16,63,144],"Wild":[17],"(SITW)":[18],"challenge.":[21,41],"Our":[22],"proposed":[23],"achieved":[25],"an":[26,45],"overall":[27],"ranking":[28],"of":[29,38,76,168,177,185],"second":[30],"place,":[31],"main":[34],"core-core":[35],"condition":[36,183],"evaluations":[37],"SITW":[40,187],"uses":[44],"ivector/":[46],"PLDA":[47,119],"approach,":[48],"with":[49,198],"domain":[50,107],"adaptation":[51,108],"and":[52,126,133,164,189],"deep":[54],"neural":[55],"network":[56],"(DNN)":[57],"trained":[58],"to":[59,109,113,123,149,159,180],"provide":[60],"feature":[61],"statistics.":[62],"statistics":[64,89],"are":[65,137,147,156],"accumulated":[66],"by":[67],"using":[68,139],"class":[69],"posteriors":[70,79],"from":[71],"DNN,":[73],"place":[75],"GMM":[77,83],"component":[78],"typical":[82],"UBM":[84],"i-vector/PLDA":[85],"system.":[86,104,170],"Once":[87],"have":[90],"been":[91],"collected,":[92],"i-vector":[94,131],"computation":[95],"is":[96,121],"carried":[97],"out":[98,161],"GMM-UBM":[102],"based":[103],"We":[105],"apply":[106],"extracted":[111],"i-vectors":[112,136],"ensure":[114],"robustness":[115],"against":[116],"dataset":[117],"variability,":[118],"modelling":[120],"used":[122,158],"capture":[124],"session":[127],"variability":[128],"space,":[132],"processed":[135],"compared":[138],"batch":[141],"likelihood":[142,153],"ratio.":[143],"final":[145],"scores":[146],"calibrated":[148,152],"obtain":[150],"scores,":[154],"which":[155],"then":[157],"carry":[160],"evaluate":[165],"performance":[167],"Finally,":[171],"we":[172],"explore":[173],"practical":[175],"application":[176],"our":[178],"core-multi":[182],"recordings":[184,197],"data":[188],"propose":[190],"technique":[192],"for":[193],"multiple":[199],"speakers.":[200]},"counts_by_year":[{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":2},{"year":2020,"cited_by_count":1},{"year":2018,"cited_by_count":1}],"updated_date":"2026-07-02T09:51:11.867554","created_date":"2025-10-10T00:00:00"}
