{"id":"https://openalex.org/W4387838354","doi":"https://doi.org/10.48550/arxiv.2310.12371","title":"Property-Aware Multi-Speaker Data Simulation: A Probabilistic Modelling Technique for Synthetic Data Generation","display_name":"Property-Aware Multi-Speaker Data Simulation: A Probabilistic Modelling Technique for Synthetic Data Generation","publication_year":2023,"publication_date":"2023-10-18","ids":{"openalex":"https://openalex.org/W4387838354","doi":"https://doi.org/10.48550/arxiv.2310.12371"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2310.12371","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2310.12371","pdf_url":"https://arxiv.org/pdf/2310.12371","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2310.12371","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101397880","display_name":"Tae Jin Park","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Park, Tae Jin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100411301","display_name":"He Huang","orcid":"https://orcid.org/0000-0002-9217-4977"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Huang, He","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5007438902","display_name":"Coleman Hooper","orcid":"https://orcid.org/0000-0002-5890-610X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hooper, Coleman","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040007143","display_name":"Nithin Rao Koluguri","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Koluguri, Nithin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5073181369","display_name":"Kunal Dhawan","orcid":"https://orcid.org/0000-0002-5276-2475"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dhawan, Kunal","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5063860258","display_name":"Ante Juki\u0107","orcid":"https://orcid.org/0000-0003-2202-2203"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jukic, Ante","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040747392","display_name":"Jagadeesh Balam","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Balam, Jagadeesh","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5032957280","display_name":"Boris Ginsburg","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ginsburg, Boris","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5101397880"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9969000220298767,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/speaker-diarisation","display_name":"Speaker diarisation","score":0.8938279151916504},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8111361861228943},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6635388135910034},{"id":"https://openalex.org/keywords/speaker-recognition","display_name":"Speaker recognition","score":0.6440668106079102},{"id":"https://openalex.org/keywords/statistical-model","display_name":"Statistical model","score":0.5638377070426941},{"id":"https://openalex.org/keywords/probabilistic-logic","display_name":"Probabilistic logic","score":0.5631365776062012},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.4811486005783081},{"id":"https://openalex.org/keywords/hidden-markov-model","display_name":"Hidden Markov model","score":0.42584818601608276},{"id":"https://openalex.org/keywords/property","display_name":"Property (philosophy)","score":0.42535579204559326},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3408125042915344}],"concepts":[{"id":"https://openalex.org/C149838564","wikidata":"https://www.wikidata.org/wiki/Q7574248","display_name":"Speaker diarisation","level":3,"score":0.8938279151916504},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8111361861228943},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6635388135910034},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.6440668106079102},{"id":"https://openalex.org/C114289077","wikidata":"https://www.wikidata.org/wiki/Q3284399","display_name":"Statistical model","level":2,"score":0.5638377070426941},{"id":"https://openalex.org/C49937458","wikidata":"https://www.wikidata.org/wiki/Q2599292","display_name":"Probabilistic logic","level":2,"score":0.5631365776062012},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.4811486005783081},{"id":"https://openalex.org/C23224414","wikidata":"https://www.wikidata.org/wiki/Q176769","display_name":"Hidden Markov model","level":2,"score":0.42584818601608276},{"id":"https://openalex.org/C189950617","wikidata":"https://www.wikidata.org/wiki/Q937228","display_name":"Property (philosophy)","level":2,"score":0.42535579204559326},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3408125042915344},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2310.12371","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2310.12371","pdf_url":"https://arxiv.org/pdf/2310.12371","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"doi:10.48550/arxiv.2310.12371","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2310.12371","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article-journal"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2310.12371","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2310.12371","pdf_url":"https://arxiv.org/pdf/2310.12371","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4387838354.pdf"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2206035908","https://openalex.org/W2162158162","https://openalex.org/W4247736853","https://openalex.org/W1493012537","https://openalex.org/W1999004162","https://openalex.org/W1992908141","https://openalex.org/W2175373321","https://openalex.org/W2125642021","https://openalex.org/W1521049138","https://openalex.org/W2938358845"],"abstract_inverted_index":{"We":[0,118],"introduce":[1],"a":[2,41,67,84],"sophisticated":[3],"multi-speaker":[4,12,72,98,123],"speech":[5,13,81],"data":[6,82],"simulator,":[7],"specifically":[8],"engineered":[9],"to":[10,24],"generate":[11],"recordings.":[14],"A":[15],"notable":[16],"feature":[17],"of":[18,28,35,59,80,147],"this":[19],"simulator":[20,99,124],"is":[21,83],"its":[22],"capacity":[23],"modulate":[25],"the":[26,33,75,115,121,135,145,161],"distribution":[27],"silence":[29],"and":[30,53,92,150],"overlap":[31],"via":[32],"adjustment":[34],"statistical":[36,110,129],"parameters.":[37,117],"This":[38],"capability":[39],"offers":[40],"tailored":[42],"training":[43,88],"environment":[44],"for":[45,50,62,87],"developing":[46],"neural":[47],"models":[48],"suited":[49],"speaker":[51,63,90,148],"diarization":[52,64,91,149],"voice":[54,93,151],"activity":[55,94,152],"detection.":[56,95],"The":[57],"acquisition":[58],"substantial":[60],"datasets":[61],"often":[65],"presents":[66],"significant":[68],"challenge,":[69],"particularly":[70],"in":[71],"scenarios.":[73],"Furthermore,":[74],"precise":[76],"time":[77],"stamp":[78],"annotation":[79],"critical":[85],"factor":[86],"both":[89],"Our":[96],"proposed":[97,122],"tackles":[100],"these":[101],"problems":[102],"by":[103],"generating":[104],"large-scale":[105],"audio":[106,126],"mixtures":[107,127],"that":[108,120,131],"maintain":[109],"properties":[111,130],"closely":[112,132],"aligned":[113],"with":[114,128,134],"input":[116,136],"demonstrate":[119],"generates":[125],"align":[133],"parameters":[137],"derived":[138],"from":[139],"real-world":[140],"statistics.":[141],"Additionally,":[142],"we":[143],"present":[144],"effectiveness":[146],"detection":[153],"models,":[154],"which":[155],"have":[156],"been":[157],"trained":[158],"exclusively":[159],"on":[160],"generated":[162],"simulated":[163],"datasets.":[164]},"counts_by_year":[],"updated_date":"2026-03-10T16:38:18.471706","created_date":"2025-10-10T00:00:00"}
