{"id":"https://openalex.org/W2989378323","doi":"https://doi.org/10.1109/icassp40776.2020.9053477","title":"Supervised Online Diarization with Sample Mean Loss for Multi-Domain Data","display_name":"Supervised Online Diarization with Sample Mean Loss for Multi-Domain Data","publication_year":2020,"publication_date":"2020-04-09","ids":{"openalex":"https://openalex.org/W2989378323","doi":"https://doi.org/10.1109/icassp40776.2020.9053477","mag":"2989378323"},"language":"en","primary_location":{"id":"doi:10.1109/icassp40776.2020.9053477","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp40776.2020.9053477","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2020 - 2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/1911.01266","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5066230537","display_name":"Enrico Fini","orcid":"https://orcid.org/0000-0002-1671-875X"},"institutions":[{"id":"https://openalex.org/I4210109975","display_name":"Provincia Autonoma di Trento","ror":"https://ror.org/017e99q89","country_code":"IT","type":"government","lineage":["https://openalex.org/I4210109975"]}],"countries":["IT"],"is_corresponding":true,"raw_author_name":"Enrico Fini","raw_affiliation_strings":["PerVoice Spa, Trento, Italy","PerVoice Spa,Trento,Italy"],"affiliations":[{"raw_affiliation_string":"PerVoice Spa, Trento, Italy","institution_ids":["https://openalex.org/I4210109975"]},{"raw_affiliation_string":"PerVoice Spa,Trento,Italy","institution_ids":["https://openalex.org/I4210109975"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5066363315","display_name":"Alessio Brutti","orcid":"https://orcid.org/0000-0003-4146-3071"},"institutions":[{"id":"https://openalex.org/I2277624104","display_name":"Fondazione Bruno Kessler","ror":"https://ror.org/01j33xk10","country_code":"IT","type":"facility","lineage":["https://openalex.org/I2277624104"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Alessio Brutti","raw_affiliation_strings":["Fondazione Bruno Kessler, Trento, Italy","Fondazione Bruno Kessler Trento Italy"],"affiliations":[{"raw_affiliation_string":"Fondazione Bruno Kessler, Trento, Italy","institution_ids":["https://openalex.org/I2277624104"]},{"raw_affiliation_string":"Fondazione Bruno Kessler Trento Italy","institution_ids":["https://openalex.org/I2277624104"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5066230537"],"corresponding_institution_ids":["https://openalex.org/I4210109975"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.00831558,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"7134","last_page":"7138"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8025850057601929},{"id":"https://openalex.org/keywords/speaker-diarisation","display_name":"Speaker diarisation","score":0.6637089252471924},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.6564950346946716},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.5762031674385071},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5071151256561279},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.5036799311637878},{"id":"https://openalex.org/keywords/sample","display_name":"Sample (material)","score":0.4800363779067993},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4551997780799866},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.4506424367427826},{"id":"https://openalex.org/keywords/conversation","display_name":"Conversation","score":0.4490278363227844},{"id":"https://openalex.org/keywords/speaker-recognition","display_name":"Speaker recognition","score":0.2728480100631714}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8025850057601929},{"id":"https://openalex.org/C149838564","wikidata":"https://www.wikidata.org/wiki/Q7574248","display_name":"Speaker diarisation","level":3,"score":0.6637089252471924},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.6564950346946716},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.5762031674385071},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5071151256561279},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.5036799311637878},{"id":"https://openalex.org/C198531522","wikidata":"https://www.wikidata.org/wiki/Q485146","display_name":"Sample (material)","level":2,"score":0.4800363779067993},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4551997780799866},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.4506424367427826},{"id":"https://openalex.org/C2777200299","wikidata":"https://www.wikidata.org/wiki/Q52943","display_name":"Conversation","level":2,"score":0.4490278363227844},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.2728480100631714},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C43617362","wikidata":"https://www.wikidata.org/wiki/Q170050","display_name":"Chromatography","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1109/icassp40776.2020.9053477","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp40776.2020.9053477","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2020 - 2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:1911.01266","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1911.01266","pdf_url":"https://arxiv.org/pdf/1911.01266","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.1911.01266","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.1911.01266","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"},{"id":"mag:2989378323","is_oa":false,"landing_page_url":null,"pdf_url":null,"source":null,"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":null}],"best_oa_location":{"id":"pmh:oai:arXiv.org:1911.01266","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1911.01266","pdf_url":"https://arxiv.org/pdf/1911.01266","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.800000011920929,"display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2989378323.pdf","grobid_xml":"https://content.openalex.org/works/W2989378323.grobid-xml"},"referenced_works_count":31,"referenced_works":["https://openalex.org/W48568691","https://openalex.org/W67702164","https://openalex.org/W1524333225","https://openalex.org/W1965819578","https://openalex.org/W2046056978","https://openalex.org/W2081074144","https://openalex.org/W2136879537","https://openalex.org/W2138730338","https://openalex.org/W2150769028","https://openalex.org/W2154241802","https://openalex.org/W2157364932","https://openalex.org/W2159591770","https://openalex.org/W2615915281","https://openalex.org/W2638067502","https://openalex.org/W2746574320","https://openalex.org/W2786458517","https://openalex.org/W2805869973","https://openalex.org/W2808631503","https://openalex.org/W2889418727","https://openalex.org/W2889519245","https://openalex.org/W2896538040","https://openalex.org/W2963470929","https://openalex.org/W2963745998","https://openalex.org/W2972449503","https://openalex.org/W2972680151","https://openalex.org/W2972949456","https://openalex.org/W6601947329","https://openalex.org/W6602779673","https://openalex.org/W6631362777","https://openalex.org/W6684363390","https://openalex.org/W6748447238"],"related_works":["https://openalex.org/W2972949456","https://openalex.org/W3043471679","https://openalex.org/W2340557458","https://openalex.org/W3158287709","https://openalex.org/W3121793919","https://openalex.org/W2405970501","https://openalex.org/W2110783210","https://openalex.org/W2981473019","https://openalex.org/W202282565","https://openalex.org/W3015463733","https://openalex.org/W71947379","https://openalex.org/W3095129427","https://openalex.org/W3015778863","https://openalex.org/W3144086690","https://openalex.org/W2400584454","https://openalex.org/W3093864324","https://openalex.org/W3042722625","https://openalex.org/W2153342002","https://openalex.org/W1787748203","https://openalex.org/W2770992561"],"abstract_inverted_index":{"Recently,":[0],"a":[1,17,47,59,77],"fully":[2],"supervised":[3],"speaker":[4,64,79,101],"diarization":[5,41],"approach":[6,115],"was":[7],"proposed":[8,114],"(UIS-RNN)":[9],"which":[10],"models":[11],"speakers":[12],"using":[13,145],"multiple":[14],"instances":[15],"of":[16,62,76],"parameter-sharing":[18],"recurrent":[19],"neural":[20],"network.":[21],"In":[22,43,83],"this":[23],"paper":[24],"we":[25,45,51,57,85,111],"propose":[26],"qualitative":[27],"modifications":[28],"to":[29,72,131,139],"the":[30,35,39,63,74,81,98,117,122,132],"model":[31,89],"that":[32,87],"significantly":[33],"improve":[34],"learning":[36],"efficiency":[37],"and":[38,56,135],"overall":[40],"performance.":[42],"particular,":[44],"introduce":[46],"novel":[48],"loss":[49],"function,":[50],"called":[52],"Sample":[53],"Mean":[54],"Loss":[55],"present":[58],"better":[60],"modelling":[61],"turn":[65],"behaviour,":[66],"by":[67],"devising":[68],"an":[69,140],"analytical":[70],"expression":[71],"compute":[73],"probability":[75],"new":[78],"joining":[80],"conversation.":[82],"addition,":[84],"demonstrate":[86],"our":[88,113,125],"can":[90],"be":[91],"trained":[92],"on":[93,116],"fixed-length":[94],"speech":[95],"segments,":[96],"removing":[97],"need":[99],"for":[100],"change":[102],"information":[103],"in":[104,121],"inference.":[105],"Using":[106],"x-vectors":[107],"as":[108],"input":[109],"features,":[110],"evaluate":[112],"multi-domain":[118],"dataset":[119],"employed":[120],"DIHARD-II":[123],"challenge:":[124],"online":[126],"method":[127],"improves":[128],"with":[129],"respect":[130],"original":[133],"UIS-RNN":[134],"achieves":[136],"similar":[137],"performance":[138],"offline":[141],"agglomerative":[142],"clustering":[143],"baseline":[144],"PLDA":[146],"scoring.":[147]},"counts_by_year":[],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2025-10-10T00:00:00"}
