{"id":"https://openalex.org/W4392909481","doi":"https://doi.org/10.1109/icassp48485.2024.10446213","title":"Improving Neural Diarization through Speaker Attribute Attractors and Local Dependency Modeling","display_name":"Improving Neural Diarization through Speaker Attribute Attractors and Local Dependency Modeling","publication_year":2024,"publication_date":"2024-03-18","ids":{"openalex":"https://openalex.org/W4392909481","doi":"https://doi.org/10.1109/icassp48485.2024.10446213"},"language":"en","primary_location":{"id":"doi:10.1109/icassp48485.2024.10446213","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp48485.2024.10446213","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2506.05593","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5054684294","display_name":"David Palzer","orcid":null},"institutions":[{"id":"https://openalex.org/I52357470","display_name":"The Ohio State University","ror":"https://ror.org/00rs6vg23","country_code":"US","type":"education","lineage":["https://openalex.org/I52357470"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"David Palzer","raw_affiliation_strings":["The Ohio State University,Computer Science and Engineering","Computer Science and Engineering, The Ohio State University"],"affiliations":[{"raw_affiliation_string":"The Ohio State University,Computer Science and Engineering","institution_ids":["https://openalex.org/I52357470"]},{"raw_affiliation_string":"Computer Science and Engineering, The Ohio State University","institution_ids":["https://openalex.org/I52357470"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5054649622","display_name":"Matthew Maciejewski","orcid":"https://orcid.org/0000-0002-6336-2135"},"institutions":[{"id":"https://openalex.org/I145311948","display_name":"Johns Hopkins University","ror":"https://ror.org/00za53h95","country_code":"US","type":"education","lineage":["https://openalex.org/I145311948"]},{"id":"https://openalex.org/I2802946424","display_name":"Johns Hopkins University Applied Physics Laboratory","ror":"https://ror.org/029pp9z10","country_code":"US","type":"facility","lineage":["https://openalex.org/I145311948","https://openalex.org/I2802946424"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Matthew Maciejewski","raw_affiliation_strings":["The Johns Hopkins University,Human Language Technology Center of Excellence","Human Language Technology Center of Excellence, The Johns Hopkins University"],"affiliations":[{"raw_affiliation_string":"The Johns Hopkins University,Human Language Technology Center of Excellence","institution_ids":["https://openalex.org/I2802946424","https://openalex.org/I145311948"]},{"raw_affiliation_string":"Human Language Technology Center of Excellence, The Johns Hopkins University","institution_ids":["https://openalex.org/I145311948"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5056667180","display_name":"Eric Fosler\u2010Lussier","orcid":"https://orcid.org/0000-0001-8004-5169"},"institutions":[{"id":"https://openalex.org/I52357470","display_name":"The Ohio State University","ror":"https://ror.org/00rs6vg23","country_code":"US","type":"education","lineage":["https://openalex.org/I52357470"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Eric Fosler-Lussier","raw_affiliation_strings":["The Ohio State University,Computer Science and Engineering","Computer Science and Engineering, The Ohio State University"],"affiliations":[{"raw_affiliation_string":"The Ohio State University,Computer Science and Engineering","institution_ids":["https://openalex.org/I52357470"]},{"raw_affiliation_string":"Computer Science and Engineering, The Ohio State University","institution_ids":["https://openalex.org/I52357470"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5054684294"],"corresponding_institution_ids":["https://openalex.org/I52357470"],"apc_list":null,"apc_paid":null,"fwci":0.7274,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.73115262,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"11911","last_page":"11915"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9160000085830688,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9160000085830688,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/speaker-diarisation","display_name":"Speaker diarisation","score":0.726482093334198},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7091507315635681},{"id":"https://openalex.org/keywords/dependency","display_name":"Dependency (UML)","score":0.6821208000183105},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.49190616607666016},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.4515843987464905},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3871939778327942},{"id":"https://openalex.org/keywords/speaker-recognition","display_name":"Speaker recognition","score":0.31522172689437866}],"concepts":[{"id":"https://openalex.org/C149838564","wikidata":"https://www.wikidata.org/wiki/Q7574248","display_name":"Speaker diarisation","level":3,"score":0.726482093334198},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7091507315635681},{"id":"https://openalex.org/C19768560","wikidata":"https://www.wikidata.org/wiki/Q320727","display_name":"Dependency (UML)","level":2,"score":0.6821208000183105},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.49190616607666016},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.4515843987464905},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3871939778327942},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.31522172689437866}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/icassp48485.2024.10446213","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp48485.2024.10446213","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2506.05593","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2506.05593","pdf_url":"https://arxiv.org/pdf/2506.05593","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2506.05593","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2506.05593","pdf_url":"https://arxiv.org/pdf/2506.05593","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320310145","display_name":"Johns Hopkins University","ror":"https://ror.org/00za53h95"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":25,"referenced_works":["https://openalex.org/W1485783873","https://openalex.org/W1965819578","https://openalex.org/W2038101708","https://openalex.org/W2150769028","https://openalex.org/W2159591770","https://openalex.org/W2219249508","https://openalex.org/W2460742184","https://openalex.org/W2558649592","https://openalex.org/W2638067502","https://openalex.org/W2735663686","https://openalex.org/W2891054259","https://openalex.org/W2900440209","https://openalex.org/W2952752702","https://openalex.org/W2963470929","https://openalex.org/W2972492143","https://openalex.org/W2972767900","https://openalex.org/W2972949456","https://openalex.org/W3008357631","https://openalex.org/W3010196324","https://openalex.org/W3097777922","https://openalex.org/W3124972797","https://openalex.org/W4220731890","https://openalex.org/W4297841362","https://openalex.org/W4372349651","https://openalex.org/W6738902873"],"related_works":["https://openalex.org/W2206035908","https://openalex.org/W4297807400","https://openalex.org/W1491159402","https://openalex.org/W2144208207","https://openalex.org/W4389984014","https://openalex.org/W1813780412","https://openalex.org/W1509309911","https://openalex.org/W1940231550","https://openalex.org/W2118860825","https://openalex.org/W2096510939"],"abstract_inverted_index":{"In":[0,48],"recent":[1],"years,":[2],"end-to-end":[3],"approaches":[4],"have":[5],"made":[6],"notable":[7],"progress":[8],"in":[9,22],"addressing":[10],"the":[11,44,53,81,101],"challenge":[12],"of":[13,75],"speaker":[14,37,60],"diarization,":[15],"which":[16],"involves":[17],"segmenting":[18],"and":[19,62],"identifying":[20],"speakers":[21],"multi-talker":[23],"recordings.":[24],"One":[25],"such":[26],"approach,":[27],"Encoder-Decoder":[28],"Attractors":[29],"(EDA),":[30],"has":[31],"been":[32],"proposed":[33],"to":[34,91],"handle":[35],"variable":[36],"counts":[38],"as":[39,41],"well":[40],"better":[42],"guide":[43],"network":[45],"during":[46],"training.":[47],"this":[49],"study,":[50],"we":[51,79],"extend":[52],"attractor":[54],"paradigm":[55],"by":[56,83],"moving":[57],"beyond":[58],"direct":[59],"modeling":[61],"instead":[63],"focus":[64],"on":[65,100],"representing":[66],"more":[67],"detailed":[68],"\u2018speaker":[69],"attributes\u2019":[70],"through":[71],"a":[72,88],"multi-stage":[73],"process":[74],"intermediate":[76],"representations.":[77],"Additionally,":[78],"enhance":[80],"architecture":[82],"replacing":[84],"transformers":[85],"with":[86],"conformers,":[87],"convolution-augmented":[89],"transformer,":[90],"model":[92],"local":[93],"dependencies.":[94],"Experiments":[95],"demonstrate":[96],"improved":[97],"diarization":[98],"performance":[99],"CALLHOME":[102],"dataset.":[103]},"counts_by_year":[{"year":2025,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
