{"id":"https://openalex.org/W4406461862","doi":"https://doi.org/10.1109/slt61566.2024.10832216","title":"FGCL: Fine-Grained Contrastive Learning For Mandarin Stuttering Event Detection","display_name":"FGCL: Fine-Grained Contrastive Learning For Mandarin Stuttering Event Detection","publication_year":2024,"publication_date":"2024-12-02","ids":{"openalex":"https://openalex.org/W4406461862","doi":"https://doi.org/10.1109/slt61566.2024.10832216"},"language":"en","primary_location":{"id":"doi:10.1109/slt61566.2024.10832216","is_oa":false,"landing_page_url":"https://doi.org/10.1109/slt61566.2024.10832216","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE Spoken Language Technology Workshop (SLT)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5103729490","display_name":"Han Jiang","orcid":null},"institutions":[{"id":"https://openalex.org/I87445476","display_name":"Xi'an Jiaotong University","ror":"https://ror.org/017zhmm22","country_code":"CN","type":"education","lineage":["https://openalex.org/I87445476"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Han Jiang","raw_affiliation_strings":["Xi&#x2019;an Jiaotong University,School of Software Engineering,Xi&#x2019;an,China"],"affiliations":[{"raw_affiliation_string":"Xi&#x2019;an Jiaotong University,School of Software Engineering,Xi&#x2019;an,China","institution_ids":["https://openalex.org/I87445476"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108047245","display_name":"Wenyu Wang","orcid":"https://orcid.org/0000-0003-3542-4420"},"institutions":[{"id":"https://openalex.org/I87445476","display_name":"Xi'an Jiaotong University","ror":"https://ror.org/017zhmm22","country_code":"CN","type":"education","lineage":["https://openalex.org/I87445476"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wenyu Wang","raw_affiliation_strings":["Xi&#x2019;an Jiaotong University,School of Software Engineering,Xi&#x2019;an,China"],"affiliations":[{"raw_affiliation_string":"Xi&#x2019;an Jiaotong University,School of Software Engineering,Xi&#x2019;an,China","institution_ids":["https://openalex.org/I87445476"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111363141","display_name":"Yiquan Zhou","orcid":"https://orcid.org/0009-0002-1453-3820"},"institutions":[{"id":"https://openalex.org/I87445476","display_name":"Xi'an Jiaotong University","ror":"https://ror.org/017zhmm22","country_code":"CN","type":"education","lineage":["https://openalex.org/I87445476"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yiquan Zhou","raw_affiliation_strings":["Xi&#x2019;an Jiaotong University,School of Software Engineering,Xi&#x2019;an,China"],"affiliations":[{"raw_affiliation_string":"Xi&#x2019;an Jiaotong University,School of Software Engineering,Xi&#x2019;an,China","institution_ids":["https://openalex.org/I87445476"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113430879","display_name":"Hongwu Ding","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hongwu Ding","raw_affiliation_strings":["Happy Elements,Shanghai,China"],"affiliations":[{"raw_affiliation_string":"Happy Elements,Shanghai,China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100622261","display_name":"Jiacheng Xu","orcid":"https://orcid.org/0000-0002-9292-6999"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jiacheng Xu","raw_affiliation_strings":["Happy Elements,Shanghai,China"],"affiliations":[{"raw_affiliation_string":"Happy Elements,Shanghai,China","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5068185614","display_name":"Jihua Zhu","orcid":"https://orcid.org/0000-0002-3081-8781"},"institutions":[{"id":"https://openalex.org/I87445476","display_name":"Xi'an Jiaotong University","ror":"https://ror.org/017zhmm22","country_code":"CN","type":"education","lineage":["https://openalex.org/I87445476"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jihua Zhu","raw_affiliation_strings":["Xi&#x2019;an Jiaotong University,School of Software Engineering,Xi&#x2019;an,China"],"affiliations":[{"raw_affiliation_string":"Xi&#x2019;an Jiaotong University,School of Software Engineering,Xi&#x2019;an,China","institution_ids":["https://openalex.org/I87445476"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5103729490"],"corresponding_institution_ids":["https://openalex.org/I87445476"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.23181158,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"379","last_page":"384"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9886000156402588,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9886000156402588,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12684","display_name":"Stuttering Research and Treatment","score":0.9833999872207642,"subfield":{"id":"https://openalex.org/subfields/3203","display_name":"Clinical Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10403","display_name":"Phonetics and Phonology Research","score":0.9695000052452087,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/mandarin-chinese","display_name":"Mandarin Chinese","score":0.8815017342567444},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.755957841873169},{"id":"https://openalex.org/keywords/stuttering","display_name":"Stuttering","score":0.5839475393295288},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.545977771282196},{"id":"https://openalex.org/keywords/event","display_name":"Event (particle physics)","score":0.5080480575561523},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.49959421157836914},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.466633141040802},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.3122510313987732}],"concepts":[{"id":"https://openalex.org/C138954614","wikidata":"https://www.wikidata.org/wiki/Q9192","display_name":"Mandarin Chinese","level":2,"score":0.8815017342567444},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.755957841873169},{"id":"https://openalex.org/C2781371259","wikidata":"https://www.wikidata.org/wiki/Q186676","display_name":"Stuttering","level":2,"score":0.5839475393295288},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.545977771282196},{"id":"https://openalex.org/C2779662365","wikidata":"https://www.wikidata.org/wiki/Q5416694","display_name":"Event (particle physics)","level":2,"score":0.5080480575561523},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.49959421157836914},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.466633141040802},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.3122510313987732},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/slt61566.2024.10832216","is_oa":false,"landing_page_url":"https://doi.org/10.1109/slt61566.2024.10832216","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE Spoken Language Technology Workshop (SLT)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":22,"referenced_works":["https://openalex.org/W1545884232","https://openalex.org/W2141201928","https://openalex.org/W2794511405","https://openalex.org/W3035524453","https://openalex.org/W3036601975","https://openalex.org/W3097777922","https://openalex.org/W3161639065","https://openalex.org/W3174421047","https://openalex.org/W3176579494","https://openalex.org/W3209059054","https://openalex.org/W4224932945","https://openalex.org/W4281909602","https://openalex.org/W4283811635","https://openalex.org/W4283814553","https://openalex.org/W4287662461","https://openalex.org/W4372337800","https://openalex.org/W4390384763","https://openalex.org/W4402111964","https://openalex.org/W6632301973","https://openalex.org/W6774314701","https://openalex.org/W6780218876","https://openalex.org/W6784262730"],"related_works":["https://openalex.org/W2348937403","https://openalex.org/W2088087094","https://openalex.org/W2606084965","https://openalex.org/W604959964","https://openalex.org/W2043322848","https://openalex.org/W1483627603","https://openalex.org/W2103044274","https://openalex.org/W2163543546","https://openalex.org/W2036832189","https://openalex.org/W1877441159"],"abstract_inverted_index":{"This":[0],"paper":[1],"presents":[2],"the":[3,8,37,60,71,98,108,123],"T031":[4],"team\u2019s":[5],"approach":[6],"to":[7,19,35,82,96],"StutteringSpeech":[9],"Challenge":[10],"in":[11,25,134,147,156],"SLT2024.":[12],"Mandarin":[13,26,120,138,148],"Stuttering":[14,48],"Event":[15,49],"Detection":[16,50],"(MSED)":[17],"aims":[18],"detect":[20],"instances":[21],"of":[22,39,74,111,125,131],"stuttering":[23,75,149],"events":[24,76],"speech.":[27],"We":[28],"propose":[29,91],"a":[30,79,92,128],"detailed":[31],"acoustic":[32],"analysis":[33],"method":[34],"improve":[36],"accuracy":[38],"stutter":[40,93],"detection":[41,151],"by":[42],"capturing":[43],"subtle":[44],"nuances":[45],"that":[46],"previous":[47],"(SED)":[51],"techniques":[52],"have":[53],"overlooked.":[54],"To":[55],"this":[56],"end,":[57],"we":[58,69,90],"introduce":[59,78],"Fine-Grained":[61],"Contrastive":[62],"Learning":[63],"(FGCL)":[64],"framework":[65],"for":[66],"MSED.":[67],"Specifically,":[68],"model":[70],"frame-level":[72],"probabilities":[73],"and":[77,86,102,119,152],"mining":[80],"algorithm":[81],"identify":[83],"both":[84],"easy":[85],"confusing":[87],"frames.":[88],"Then,":[89],"contrast":[94],"loss":[95],"enhance":[97],"distinction":[99],"between":[100],"stuttered":[101,112],"fluent":[103],"speech":[104,154],"frames,":[105],"thereby":[106],"improving":[107],"discriminative":[109],"capability":[110],"feature":[113],"embeddings.":[114],"Extensive":[115],"evaluations":[116],"on":[117,137],"English":[118],"datasets":[121],"demonstrate":[122],"effectiveness":[124],"FGCL,":[126],"achieving":[127],"significant":[129],"increase":[130],"over":[132],"5.0%":[133],"F1":[135],"score":[136],"data<sup":[139],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[140,142],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">1</sup>.<sup":[141],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">1</sup>FGCL":[143],"won":[144],"3rd":[145],"place":[146],"event":[150],"automatic":[153],"recognition":[155],"SLT2024":[157]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
