{"id":"https://openalex.org/W2983398075","doi":"https://doi.org/10.1109/icassp40776.2020.9053373","title":"Improved Large-Margin Softmax Loss for Speaker Diarisation","display_name":"Improved Large-Margin Softmax Loss for Speaker Diarisation","publication_year":2020,"publication_date":"2020-04-09","ids":{"openalex":"https://openalex.org/W2983398075","doi":"https://doi.org/10.1109/icassp40776.2020.9053373","mag":"2983398075"},"language":"en","primary_location":{"id":"doi:10.1109/icassp40776.2020.9053373","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp40776.2020.9053373","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2020 - 2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/1911.03970","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5006814826","display_name":"Yassir Fathullah","orcid":null},"institutions":[{"id":"https://openalex.org/I241749","display_name":"University of Cambridge","ror":"https://ror.org/013meh722","country_code":"GB","type":"education","lineage":["https://openalex.org/I241749"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Y. Fathullah","raw_affiliation_strings":["Engineering Department, Cambridge University, UK"],"affiliations":[{"raw_affiliation_string":"Engineering Department, Cambridge University, UK","institution_ids":["https://openalex.org/I241749"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5007989522","display_name":"C. Zhang","orcid":"https://orcid.org/0000-0002-2173-6872"},"institutions":[{"id":"https://openalex.org/I241749","display_name":"University of Cambridge","ror":"https://ror.org/013meh722","country_code":"GB","type":"education","lineage":["https://openalex.org/I241749"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"C. Zhang","raw_affiliation_strings":["Engineering Department, Cambridge University, UK"],"affiliations":[{"raw_affiliation_string":"Engineering Department, Cambridge University, UK","institution_ids":["https://openalex.org/I241749"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5002191410","display_name":"Philip C. Woodland","orcid":"https://orcid.org/0000-0001-9069-0225"},"institutions":[{"id":"https://openalex.org/I241749","display_name":"University of Cambridge","ror":"https://ror.org/013meh722","country_code":"GB","type":"education","lineage":["https://openalex.org/I241749"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"P. C. Woodland","raw_affiliation_strings":["Engineering Department, Cambridge University, UK"],"affiliations":[{"raw_affiliation_string":"Engineering Department, Cambridge University, UK","institution_ids":["https://openalex.org/I241749"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5006814826"],"corresponding_institution_ids":["https://openalex.org/I241749"],"apc_list":null,"apc_paid":null,"fwci":1.3713,"has_fulltext":false,"cited_by_count":10,"citation_normalized_percentile":{"value":0.8481093,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"7104","last_page":"7108"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/softmax-function","display_name":"Softmax function","score":0.9560704827308655},{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.7898867130279541},{"id":"https://openalex.org/keywords/margin","display_name":"Margin (machine learning)","score":0.7790814638137817},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.698106586933136},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6595115661621094},{"id":"https://openalex.org/keywords/reduction","display_name":"Reduction (mathematics)","score":0.49597224593162537},{"id":"https://openalex.org/keywords/word-error-rate","display_name":"Word error rate","score":0.4759773313999176},{"id":"https://openalex.org/keywords/bottleneck","display_name":"Bottleneck","score":0.47159990668296814},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.44656848907470703},{"id":"https://openalex.org/keywords/speech-enhancement","display_name":"Speech enhancement","score":0.4123395085334778},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.38910144567489624},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.22968772053718567},{"id":"https://openalex.org/keywords/noise-reduction","display_name":"Noise reduction","score":0.22313827276229858},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.19776850938796997},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.17289254069328308}],"concepts":[{"id":"https://openalex.org/C188441871","wikidata":"https://www.wikidata.org/wiki/Q7554146","display_name":"Softmax function","level":3,"score":0.9560704827308655},{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.7898867130279541},{"id":"https://openalex.org/C774472","wikidata":"https://www.wikidata.org/wiki/Q6760393","display_name":"Margin (machine learning)","level":2,"score":0.7790814638137817},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.698106586933136},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6595115661621094},{"id":"https://openalex.org/C111335779","wikidata":"https://www.wikidata.org/wiki/Q3454686","display_name":"Reduction (mathematics)","level":2,"score":0.49597224593162537},{"id":"https://openalex.org/C40969351","wikidata":"https://www.wikidata.org/wiki/Q3516228","display_name":"Word error rate","level":2,"score":0.4759773313999176},{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.47159990668296814},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.44656848907470703},{"id":"https://openalex.org/C2776182073","wikidata":"https://www.wikidata.org/wiki/Q7575395","display_name":"Speech enhancement","level":3,"score":0.4123395085334778},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.38910144567489624},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.22968772053718567},{"id":"https://openalex.org/C163294075","wikidata":"https://www.wikidata.org/wiki/Q581861","display_name":"Noise reduction","level":2,"score":0.22313827276229858},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.19776850938796997},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.17289254069328308},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/icassp40776.2020.9053373","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp40776.2020.9053373","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2020 - 2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:1911.03970","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1911.03970","pdf_url":"https://arxiv.org/pdf/1911.03970","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:1911.03970","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1911.03970","pdf_url":"https://arxiv.org/pdf/1911.03970","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[{"score":0.7300000190734863,"id":"https://metadata.un.org/sdg/10","display_name":"Reduced inequalities"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":37,"referenced_works":["https://openalex.org/W1528954144","https://openalex.org/W2038101708","https://openalex.org/W2046056978","https://openalex.org/W2081074144","https://openalex.org/W2114925438","https://openalex.org/W2125336414","https://openalex.org/W2148613904","https://openalex.org/W2159591770","https://openalex.org/W2405970501","https://openalex.org/W2520774990","https://openalex.org/W2612434969","https://openalex.org/W2638067502","https://openalex.org/W2743167696","https://openalex.org/W2747238065","https://openalex.org/W2889519245","https://openalex.org/W2890964092","https://openalex.org/W2896538040","https://openalex.org/W2932691488","https://openalex.org/W2938372602","https://openalex.org/W2940070181","https://openalex.org/W2962898354","https://openalex.org/W2963224870","https://openalex.org/W2963350250","https://openalex.org/W2963466847","https://openalex.org/W2963470929","https://openalex.org/W2963656735","https://openalex.org/W2963962398","https://openalex.org/W2969985801","https://openalex.org/W2972986505","https://openalex.org/W6662018943","https://openalex.org/W6678809451","https://openalex.org/W6713679436","https://openalex.org/W6726946684","https://openalex.org/W6735013348","https://openalex.org/W6737575990","https://openalex.org/W6742972455","https://openalex.org/W6751593755"],"related_works":["https://openalex.org/W3107204728","https://openalex.org/W4287591324","https://openalex.org/W4226420367","https://openalex.org/W2980176872","https://openalex.org/W2962876041","https://openalex.org/W3095152779","https://openalex.org/W3119773509","https://openalex.org/W3128220219","https://openalex.org/W2162582511","https://openalex.org/W2401089611"],"abstract_inverted_index":{"Speaker":[0],"diarisation":[1],"systems":[2],"nowadays":[3],"use":[4,39,121],"embeddings":[5,68],"generated":[6],"from":[7],"speech":[8,106,166],"segments":[9],"in":[10,40,140],"a":[11,51,72,141,150,178],"bottleneck":[12],"layer,":[13],"which":[14,148],"are":[15,98],"needed":[16],"to":[17,34,54,62,77,88,100,183],"be":[18],"discriminative":[19,110],"for":[20,69],"unseen":[21,35],"speakers.":[22],"It":[23],"is":[24,83,85],"well-known":[25],"that":[26,119],"large-margin":[27,56,81,123],"training":[28,96,161],"can":[29],"improve":[30,63],"the":[31,55,64,90,102,114,120,127,138,157,171,184],"generalisation":[32],"ability":[33],"data,":[36],"and":[37,74,163],"its":[38],"such":[41],"open-set":[42],"problems":[43],"has":[44,107],"been":[45],"widespread.":[46],"Therefore,":[47],"this":[48],"paper":[49],"introduces":[50],"general":[52],"approach":[53],"softmax":[57,82,124],"loss":[58,139],"without":[59],"any":[60],"approximations":[61],"quality":[65],"of":[66,92,122,137,154],"speaker":[67,128,165],"diarisation.":[70],"Furthermore,":[71],"novel":[73],"simple":[75],"way":[76],"stabilise":[78],"training,":[79],"when":[80],"used,":[84],"proposed.":[86],"Finally,":[87],"combat":[89],"effect":[91,104],"overlapping":[93,105,162],"speech,":[94],"different":[95,169],"margins":[97],"used":[99],"reduce":[101],"negative":[103],"on":[108,113],"creating":[109],"embeddings.":[111],"Experiments":[112],"AMI":[115],"meeting":[116],"corpus":[117],"show":[118],"significantly":[125],"improves":[126],"error":[129],"rate":[130],"(SER).":[131],"By":[132],"using":[133],"all":[134],"hyper":[135],"parameters":[136],"unified":[142],"way,":[143],"further":[144],"improvements":[145],"were":[146],"achieved":[147],"reached":[149],"relative":[151,182],"SER":[152,180],"reduction":[153,181],"24.6%":[155],"over":[156],"baseline.":[158,185],"However,":[159],"by":[160],"single":[164],"samples":[167],"with":[168],"margins,":[170],"best":[172],"result":[173],"was":[174],"achieved,":[175],"giving":[176],"overall":[177],"29.5%":[179]},"counts_by_year":[{"year":2022,"cited_by_count":3},{"year":2021,"cited_by_count":7}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2025-10-10T00:00:00"}
