{"id":"https://openalex.org/W7125964632","doi":"https://doi.org/10.1109/smc58881.2025.11342781","title":"StyU-STD: Style-Diverse Sample Generation from Unlabeled Data for Query-by-Example Spoken Term Detection","display_name":"StyU-STD: Style-Diverse Sample Generation from Unlabeled Data for Query-by-Example Spoken Term Detection","publication_year":2025,"publication_date":"2025-10-05","ids":{"openalex":"https://openalex.org/W7125964632","doi":"https://doi.org/10.1109/smc58881.2025.11342781"},"language":null,"primary_location":{"id":"doi:10.1109/smc58881.2025.11342781","is_oa":false,"landing_page_url":"https://doi.org/10.1109/smc58881.2025.11342781","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Systems, Man, and Cybernetics (SMC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5124115925","display_name":"Hanyu Ding","orcid":null},"institutions":[{"id":"https://openalex.org/I115592961","display_name":"Jiangsu University","ror":"https://ror.org/03jc41j30","country_code":"CN","type":"education","lineage":["https://openalex.org/I115592961"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Hanyu Ding","raw_affiliation_strings":["Jiangsu University,School of Computer Science and Communication Engineering,Zhenjiang,China"],"affiliations":[{"raw_affiliation_string":"Jiangsu University,School of Computer Science and Communication Engineering,Zhenjiang,China","institution_ids":["https://openalex.org/I115592961"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5008699574","display_name":"Lijian Gao","orcid":"https://orcid.org/0000-0002-6458-0660"},"institutions":[{"id":"https://openalex.org/I115592961","display_name":"Jiangsu University","ror":"https://ror.org/03jc41j30","country_code":"CN","type":"education","lineage":["https://openalex.org/I115592961"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lijian Gao","raw_affiliation_strings":["Jiangsu University,School of Computer Science and Communication Engineering,Zhenjiang,China"],"affiliations":[{"raw_affiliation_string":"Jiangsu University,School of Computer Science and Communication Engineering,Zhenjiang,China","institution_ids":["https://openalex.org/I115592961"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010729889","display_name":"Wenlong Dong","orcid":"https://orcid.org/0000-0003-3914-1081"},"institutions":[{"id":"https://openalex.org/I115592961","display_name":"Jiangsu University","ror":"https://ror.org/03jc41j30","country_code":"CN","type":"education","lineage":["https://openalex.org/I115592961"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wenlong Dong","raw_affiliation_strings":["Jiangsu University,School of Computer Science and Communication Engineering,Zhenjiang,China"],"affiliations":[{"raw_affiliation_string":"Jiangsu University,School of Computer Science and Communication Engineering,Zhenjiang,China","institution_ids":["https://openalex.org/I115592961"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057749373","display_name":"Xiaodong Li","orcid":"https://orcid.org/0000-0003-2348-926X"},"institutions":[{"id":"https://openalex.org/I115592961","display_name":"Jiangsu University","ror":"https://ror.org/03jc41j30","country_code":"CN","type":"education","lineage":["https://openalex.org/I115592961"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiangrui Li","raw_affiliation_strings":["Jiangsu University,School of Computer Science and Communication Engineering,Zhenjiang,China"],"affiliations":[{"raw_affiliation_string":"Jiangsu University,School of Computer Science and Communication Engineering,Zhenjiang,China","institution_ids":["https://openalex.org/I115592961"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5039208882","display_name":"Qirong Mao","orcid":null},"institutions":[{"id":"https://openalex.org/I115592961","display_name":"Jiangsu University","ror":"https://ror.org/03jc41j30","country_code":"CN","type":"education","lineage":["https://openalex.org/I115592961"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qirong Mao","raw_affiliation_strings":["Jiangsu University,School of Computer Science and Communication Engineering,Zhenjiang,China"],"affiliations":[{"raw_affiliation_string":"Jiangsu University,School of Computer Science and Communication Engineering,Zhenjiang,China","institution_ids":["https://openalex.org/I115592961"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5124115925"],"corresponding_institution_ids":["https://openalex.org/I115592961"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.83966667,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1709","last_page":"1715"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.6599000096321106,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.6599000096321106,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.12250000238418579,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.03689999878406525,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/term","display_name":"Term (time)","score":0.6848999857902527},{"id":"https://openalex.org/keywords/sample","display_name":"Sample (material)","score":0.5489000082015991},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.48089998960494995},{"id":"https://openalex.org/keywords/labeled-data","display_name":"Labeled data","score":0.4383000135421753},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.42719998955726624},{"id":"https://openalex.org/keywords/voice-activity-detection","display_name":"Voice activity detection","score":0.33719998598098755},{"id":"https://openalex.org/keywords/measure","display_name":"Measure (data warehouse)","score":0.33009999990463257}],"concepts":[{"id":"https://openalex.org/C61797465","wikidata":"https://www.wikidata.org/wiki/Q1188986","display_name":"Term (time)","level":2,"score":0.6848999857902527},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6700000166893005},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5985999703407288},{"id":"https://openalex.org/C198531522","wikidata":"https://www.wikidata.org/wiki/Q485146","display_name":"Sample (material)","level":2,"score":0.5489000082015991},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.49570000171661377},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.48089998960494995},{"id":"https://openalex.org/C2776145971","wikidata":"https://www.wikidata.org/wiki/Q30673951","display_name":"Labeled data","level":2,"score":0.4383000135421753},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.42719998955726624},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.37869998812675476},{"id":"https://openalex.org/C204201278","wikidata":"https://www.wikidata.org/wiki/Q1332614","display_name":"Voice activity detection","level":3,"score":0.33719998598098755},{"id":"https://openalex.org/C2780009758","wikidata":"https://www.wikidata.org/wiki/Q6804172","display_name":"Measure (data warehouse)","level":2,"score":0.33009999990463257},{"id":"https://openalex.org/C2776445246","wikidata":"https://www.wikidata.org/wiki/Q1792644","display_name":"Style (visual arts)","level":2,"score":0.3174999952316284},{"id":"https://openalex.org/C2164484","wikidata":"https://www.wikidata.org/wiki/Q5170150","display_name":"Core (optical fiber)","level":2,"score":0.2919999957084656},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.2888000011444092},{"id":"https://openalex.org/C32022120","wikidata":"https://www.wikidata.org/wiki/Q797225","display_name":"Interference (communication)","level":3,"score":0.28679999709129333},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.2782000005245209},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.26980000734329224}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/smc58881.2025.11342781","is_oa":false,"landing_page_url":"https://doi.org/10.1109/smc58881.2025.11342781","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Systems, Man, and Cybernetics (SMC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.46017301082611084,"display_name":"Quality Education"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320321410","display_name":"Jiangsu University","ror":"https://ror.org/03jc41j30"},{"id":"https://openalex.org/F4320330944","display_name":"Nature","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":28,"referenced_works":["https://openalex.org/W1494198834","https://openalex.org/W1567520911","https://openalex.org/W2114347655","https://openalex.org/W2603777577","https://openalex.org/W2895103375","https://openalex.org/W2963571336","https://openalex.org/W2964169922","https://openalex.org/W2972659941","https://openalex.org/W3019546258","https://openalex.org/W3045214847","https://openalex.org/W3146777637","https://openalex.org/W3151510603","https://openalex.org/W3160452051","https://openalex.org/W3200887081","https://openalex.org/W3209984917","https://openalex.org/W3216296943","https://openalex.org/W4206567542","https://openalex.org/W4281492411","https://openalex.org/W4295308567","https://openalex.org/W4372260053","https://openalex.org/W4384407239","https://openalex.org/W4385807530","https://openalex.org/W4385822689","https://openalex.org/W4392932011","https://openalex.org/W4402112189","https://openalex.org/W4406611587","https://openalex.org/W4406611965","https://openalex.org/W4406612793"],"related_works":[],"abstract_inverted_index":{"In":[0,129],"recent":[1],"years,":[2],"query-by-example":[3,75],"spoken":[4],"term":[5],"detection":[6,14],"(QbE-STD)":[7],"techniques":[8],"have":[9],"made":[10],"significant":[11],"progress":[12],"in":[13,170],"accuracy":[15,196],"and":[16,142,172,197],"speed.":[17],"However,":[18],"this":[19,59],"task":[20],"also":[21],"encounters":[22],"situations":[23],"where":[24],"labeled":[25,119],"data":[26,35,73,117],"is":[27,83],"scarce":[28],"or":[29],"even":[30],"nonexistent,":[31],"with":[32,87],"only":[33],"unlabeled":[34],"available.":[36],"Although":[37],"some":[38],"solutions":[39],"exist,":[40],"they":[41],"still":[42],"struggle":[43],"to":[44,54,84,104,126,135,157],"effectively":[45,173],"handle":[46],"highly":[47],"variable":[48],"speech,":[49],"especially":[50],"when":[51],"it":[52],"comes":[53],"differing":[55],"styles.":[56],"To":[57],"address":[58],"issue,":[60],"we":[61,97],"propose":[62],"a":[63],"self-supervised":[64],"learning":[65],"method":[66,190],"named":[67],"Style-diverse":[68],"sample":[69,153],"generation":[70],"from":[71,101,114],"Unlabeled":[72],"for":[74,94],"Spoken":[76],"Term":[77],"Detection":[78],"(StyU-STD).":[79],"The":[80,181],"core":[81],"idea":[82],"generate":[85],"samples":[86,122,144],"the":[88,102,124,137,151,159,175],"same":[89],"content":[90],"but":[91],"different":[92],"styles":[93],"learning.":[95],"Specifically,":[96],"randomly":[98,112],"extract":[99],"segments":[100,111],"speech":[103,116,125,171],"be":[105,127],"tested":[106],"as":[107,120],"positive":[108,141],"samples,":[109],"while":[110,145],"extracted":[113],"other":[115],"are":[118,133,155],"negative":[121,143],"of":[123,139],"tested.":[128],"addition,":[130],"various":[131],"transformations":[132],"applied":[134],"alter":[136],"style":[138,179],"both":[140],"preserving":[146],"their":[147],"original":[148],"content.":[149],"Then,":[150],"generated":[152],"pairs":[154],"used":[156],"train":[158],"Style":[160],"Suppressed":[161],"Convolutional":[162],"Network,":[163],"which":[164],"focuses":[165],"more":[166],"on":[167],"content-related":[168],"information":[169],"reduces":[174],"interference":[176],"caused":[177],"by":[178],"differences.":[180],"experimental":[182],"results":[183],"show":[184],"that,":[185],"across":[186],"multiple":[187],"datasets,":[188],"our":[189],"outperforms":[191],"existing":[192],"methods,":[193],"achieving":[194],"higher":[195],"robustness.":[198]},"counts_by_year":[],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2026-01-29T00:00:00"}
