{"id":"https://openalex.org/W147600081","doi":"https://doi.org/10.21437/interspeech.2012-571","title":"Group sparsity for speaker identity discrimination in factorisation-based speech recognition","display_name":"Group sparsity for speaker identity discrimination in factorisation-based speech recognition","publication_year":2012,"publication_date":"2012-09-09","ids":{"openalex":"https://openalex.org/W147600081","doi":"https://doi.org/10.21437/interspeech.2012-571","mag":"147600081"},"language":"en","primary_location":{"id":"doi:10.21437/interspeech.2012-571","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2012-571","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2012","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"http://hdl.handle.net/2066/101733","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5062394539","display_name":"Antti Hurmalainen","orcid":null},"institutions":[{"id":"https://openalex.org/I150589677","display_name":"Tampere University of Applied Sciences","ror":"https://ror.org/00bwtjf83","country_code":"FI","type":"education","lineage":["https://openalex.org/I150589677"]}],"countries":["FI"],"is_corresponding":true,"raw_author_name":"Antti Hurmalainen","raw_affiliation_strings":["[Tampere Univ. of Tech.]"],"affiliations":[{"raw_affiliation_string":"[Tampere Univ. of Tech.]","institution_ids":["https://openalex.org/I150589677"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5023224767","display_name":"Rahim Saeidi","orcid":"https://orcid.org/0000-0002-9084-0091"},"institutions":[{"id":"https://openalex.org/I145872427","display_name":"Radboud University Nijmegen","ror":"https://ror.org/016xsfp80","country_code":"NL","type":"education","lineage":["https://openalex.org/I145872427"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Rahim Saeidi","raw_affiliation_strings":["Radboud University Nijmegen#TAB#"],"affiliations":[{"raw_affiliation_string":"Radboud University Nijmegen#TAB#","institution_ids":["https://openalex.org/I145872427"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5049691461","display_name":"Tuomas Virtanen","orcid":"https://orcid.org/0000-0002-4604-9729"},"institutions":[{"id":"https://openalex.org/I150589677","display_name":"Tampere University of Applied Sciences","ror":"https://ror.org/00bwtjf83","country_code":"FI","type":"education","lineage":["https://openalex.org/I150589677"]}],"countries":["FI"],"is_corresponding":false,"raw_author_name":"Tuomas Virtanen","raw_affiliation_strings":["[Tampere Univ. of Tech.]"],"affiliations":[{"raw_affiliation_string":"[Tampere Univ. of Tech.]","institution_ids":["https://openalex.org/I150589677"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5062394539"],"corresponding_institution_ids":["https://openalex.org/I150589677"],"apc_list":null,"apc_paid":null,"fwci":3.2193,"has_fulltext":true,"cited_by_count":26,"citation_normalized_percentile":{"value":0.93858596,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"2138","last_page":"2141"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11447","display_name":"Blind Source Separation Techniques","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/spectrogram","display_name":"Spectrogram","score":0.8870609998703003},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.7512509226799011},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6832174062728882},{"id":"https://openalex.org/keywords/identity","display_name":"Identity (music)","score":0.6729425191879272},{"id":"https://openalex.org/keywords/speaker-recognition","display_name":"Speaker recognition","score":0.6476876735687256},{"id":"https://openalex.org/keywords/non-negative-matrix-factorization","display_name":"Non-negative matrix factorization","score":0.5869680047035217},{"id":"https://openalex.org/keywords/noise","display_name":"Noise (video)","score":0.5625104904174805},{"id":"https://openalex.org/keywords/signal","display_name":"SIGNAL (programming language)","score":0.49797940254211426},{"id":"https://openalex.org/keywords/speaker-identification","display_name":"Speaker identification","score":0.4768594205379486},{"id":"https://openalex.org/keywords/source-separation","display_name":"Source separation","score":0.4762292504310608},{"id":"https://openalex.org/keywords/factorization","display_name":"Factorization","score":0.47213155031204224},{"id":"https://openalex.org/keywords/identification","display_name":"Identification (biology)","score":0.4572948217391968},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4397461712360382},{"id":"https://openalex.org/keywords/speaker-diarisation","display_name":"Speaker diarisation","score":0.430226594209671},{"id":"https://openalex.org/keywords/group","display_name":"Group (periodic table)","score":0.419790118932724},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3646916151046753},{"id":"https://openalex.org/keywords/matrix-decomposition","display_name":"Matrix decomposition","score":0.24952563643455505},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.19114965200424194},{"id":"https://openalex.org/keywords/acoustics","display_name":"Acoustics","score":0.17025226354599},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.0661625862121582},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.06364098191261292}],"concepts":[{"id":"https://openalex.org/C45273575","wikidata":"https://www.wikidata.org/wiki/Q578970","display_name":"Spectrogram","level":2,"score":0.8870609998703003},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7512509226799011},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6832174062728882},{"id":"https://openalex.org/C2778355321","wikidata":"https://www.wikidata.org/wiki/Q17079427","display_name":"Identity (music)","level":2,"score":0.6729425191879272},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.6476876735687256},{"id":"https://openalex.org/C152671427","wikidata":"https://www.wikidata.org/wiki/Q10843505","display_name":"Non-negative matrix factorization","level":4,"score":0.5869680047035217},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.5625104904174805},{"id":"https://openalex.org/C2779843651","wikidata":"https://www.wikidata.org/wiki/Q7390335","display_name":"SIGNAL (programming language)","level":2,"score":0.49797940254211426},{"id":"https://openalex.org/C2986627078","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker identification","level":3,"score":0.4768594205379486},{"id":"https://openalex.org/C2776864781","wikidata":"https://www.wikidata.org/wiki/Q52617913","display_name":"Source separation","level":2,"score":0.4762292504310608},{"id":"https://openalex.org/C187834632","wikidata":"https://www.wikidata.org/wiki/Q188804","display_name":"Factorization","level":2,"score":0.47213155031204224},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.4572948217391968},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4397461712360382},{"id":"https://openalex.org/C149838564","wikidata":"https://www.wikidata.org/wiki/Q7574248","display_name":"Speaker diarisation","level":3,"score":0.430226594209671},{"id":"https://openalex.org/C2781311116","wikidata":"https://www.wikidata.org/wiki/Q83306","display_name":"Group (periodic table)","level":2,"score":0.419790118932724},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3646916151046753},{"id":"https://openalex.org/C42355184","wikidata":"https://www.wikidata.org/wiki/Q1361088","display_name":"Matrix decomposition","level":3,"score":0.24952563643455505},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.19114965200424194},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.17025226354599},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0661625862121582},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.06364098191261292},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C158693339","wikidata":"https://www.wikidata.org/wiki/Q190524","display_name":"Eigenvalues and eigenvectors","level":2,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C59822182","wikidata":"https://www.wikidata.org/wiki/Q441","display_name":"Botany","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.21437/interspeech.2012-571","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2012-571","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2012","raw_type":"proceedings-article"},{"id":"pmh:oai:repository.ubn.ru.nl:2066/101733","is_oa":true,"landing_page_url":"http://hdl.handle.net/2066/101733","pdf_url":"http://hdl.handle.net/2066/101733","source":{"id":"https://openalex.org/S4306401067","display_name":"Radboud Repository (Radboud University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I145872427","host_organization_name":"Radboud University Nijmegen","host_organization_lineage":["https://openalex.org/I145872427"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Proceedings of Interspeech 2012, pp. dvd","raw_type":"Article in monograph or in proceedings"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.720.8542","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.720.8542","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://www.cs.tut.fi/sgn/arg/music/tuomasv/hurmalainen_interspeech2012.pdf","raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:repository.ubn.ru.nl:2066/101733","is_oa":true,"landing_page_url":"http://hdl.handle.net/2066/101733","pdf_url":"http://hdl.handle.net/2066/101733","source":{"id":"https://openalex.org/S4306401067","display_name":"Radboud Repository (Radboud University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I145872427","host_organization_name":"Radboud University Nijmegen","host_organization_lineage":["https://openalex.org/I145872427"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Proceedings of Interspeech 2012, pp. dvd","raw_type":"Article in monograph or in proceedings"},"sustainable_development_goals":[{"score":0.6000000238418579,"id":"https://metadata.un.org/sdg/10","display_name":"Reduced inequalities"}],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W147600081.pdf","grobid_xml":"https://content.openalex.org/works/W147600081.grobid-xml"},"referenced_works_count":13,"referenced_works":["https://openalex.org/W2022257069","https://openalex.org/W2027717478","https://openalex.org/W2106398669","https://openalex.org/W2113154266","https://openalex.org/W2124149378","https://openalex.org/W2132972617","https://openalex.org/W2141520175","https://openalex.org/W2148577181","https://openalex.org/W2150008106","https://openalex.org/W2157639439","https://openalex.org/W2168076624","https://openalex.org/W2336741987","https://openalex.org/W2399491141"],"related_works":["https://openalex.org/W2551137307","https://openalex.org/W2113526703","https://openalex.org/W2123043102","https://openalex.org/W2577807713","https://openalex.org/W3094316140","https://openalex.org/W3133205200","https://openalex.org/W2547262076","https://openalex.org/W2098101267","https://openalex.org/W2563421448","https://openalex.org/W2898145319"],"abstract_inverted_index":{"Spectrogram":[0],"factorisation":[1],"using":[2,75,143],"a":[3,14,30,60,68,107],"dictionary":[4],"of":[5,36,47,63,82,90,111,129,135],"spectro-temporal":[6],"atoms":[7,23,38,64,112],"has":[8],"been":[9],"successfully":[10],"employed":[11],"to":[12,78,86,99,114],"separate":[13],"mixed":[15],"audio":[16],"signal":[17],"into":[18],"its":[19],"source":[20],"components.":[21],"When":[22],"from":[24,94],"multiple":[25,95],"sources":[26,41],"are":[27,65],"included":[28],"in":[29],"combined":[31],"dictionary,":[32],"the":[33,45,53,88,101,115,122,127,133],"relative":[34],"weights":[35,55],"activated":[37],"reveal":[39],"likely":[40,117],"as":[42,44],"well":[43],"content":[46,103],"each":[48],"source.":[49],"Enforcing":[50],"sparsity":[51,77,131],"on":[52,121],"activation":[54,81],"produces":[56],"solutions,":[57],"where":[58],"only":[59],"small":[61],"number":[62],"active":[66],"at":[67],"time.":[69],"In":[70],"this":[71],"paper":[72],"we":[73],"pro-pose":[74],"group":[76,130,148],"restrict":[79],"simultaneous":[80],"sources,":[83],"allowing":[84],"us":[85],"discover":[87],"identity":[89],"an":[91],"unknown":[92],"speaker":[93,138,152],"candidates,":[96],"and":[97,140],"further":[98],"recognise":[100],"phonetic":[102],"more":[104],"reliably":[105],"with":[106],"narrowed":[108],"down":[109],"subset":[110],"belonging":[113],"most":[116],"speakers.":[118],"An":[119],"evalua-tion":[120],"CHiME":[123],"corpus":[124],"shows":[125],"that":[126],"use":[128],"improves":[132],"results":[134],"noise":[136],"robust":[137],"identification":[139],"speech":[141,150],"recognition":[142],"speaker-dependent":[144],"models.":[145],"Index":[146],"Terms:":[147],"sparsity,":[149],"recognition,":[151],"iden-tification,":[153],"spectrogram":[154],"factorization":[155]},"counts_by_year":[{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":2},{"year":2018,"cited_by_count":3},{"year":2016,"cited_by_count":6},{"year":2015,"cited_by_count":5},{"year":2014,"cited_by_count":3},{"year":2013,"cited_by_count":5}],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
