{"id":"https://openalex.org/W4393973357","doi":"https://doi.org/10.1109/ncc60321.2024.10486028","title":"Exploring the Use of Self-Supervised Representations for Automatic Syllable Stress Detection","display_name":"Exploring the Use of Self-Supervised Representations for Automatic Syllable Stress Detection","publication_year":2024,"publication_date":"2024-02-28","ids":{"openalex":"https://openalex.org/W4393973357","doi":"https://doi.org/10.1109/ncc60321.2024.10486028"},"language":"en","primary_location":{"id":"doi:10.1109/ncc60321.2024.10486028","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ncc60321.2024.10486028","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 National Conference on Communications (NCC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5008931819","display_name":"Jhansi Mallela","orcid":null},"institutions":[{"id":"https://openalex.org/I65181880","display_name":"Indian Institute of Technology Hyderabad","ror":"https://ror.org/01j4v3x97","country_code":"IN","type":"education","lineage":["https://openalex.org/I65181880"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Jhansi Mallela","raw_affiliation_strings":["IIIT Hyderabad,Speech lab, LTRC,India","Speech lab, LTRC, IIIT Hyderabad, India"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"IIIT Hyderabad,Speech lab, LTRC,India","institution_ids":["https://openalex.org/I65181880"]},{"raw_affiliation_string":"Speech lab, LTRC, IIIT Hyderabad, India","institution_ids":["https://openalex.org/I65181880"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5092596607","display_name":"Sai Harshitha Aluru","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sai Harshitha Aluru","raw_affiliation_strings":["AI Vidya Jyothi Institute Of Technology Hyderabad,India","AI Vidya Jyothi Institute Of Technology Hyderabad, India"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"AI Vidya Jyothi Institute Of Technology Hyderabad,India","institution_ids":[]},{"raw_affiliation_string":"AI Vidya Jyothi Institute Of Technology Hyderabad, India","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5064062192","display_name":"Chiranjeevi Yarra","orcid":"https://orcid.org/0000-0002-0574-8777"},"institutions":[{"id":"https://openalex.org/I65181880","display_name":"Indian Institute of Technology Hyderabad","ror":"https://ror.org/01j4v3x97","country_code":"IN","type":"education","lineage":["https://openalex.org/I65181880"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Chiranjeevi Yarra","raw_affiliation_strings":["IIIT Hyderabad,Speech lab, LTRC,India","Speech lab, LTRC, IIIT Hyderabad, India"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"IIIT Hyderabad,Speech lab, LTRC,India","institution_ids":["https://openalex.org/I65181880"]},{"raw_affiliation_string":"Speech lab, LTRC, IIIT Hyderabad, India","institution_ids":["https://openalex.org/I65181880"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.7448,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.83798697,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10667","display_name":"Emotion and Mood Recognition","score":0.8251000046730042,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T10667","display_name":"Emotion and Mood Recognition","score":0.8251000046730042,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6591611504554749},{"id":"https://openalex.org/keywords/stress","display_name":"Stress (linguistics)","score":0.6541196703910828},{"id":"https://openalex.org/keywords/syllable","display_name":"Syllable","score":0.5977514386177063},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.49994349479675293},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4730093777179718},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.46539223194122314},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.32783517241477966},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.11227068305015564}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6591611504554749},{"id":"https://openalex.org/C21036866","wikidata":"https://www.wikidata.org/wiki/Q181767","display_name":"Stress (linguistics)","level":2,"score":0.6541196703910828},{"id":"https://openalex.org/C109089402","wikidata":"https://www.wikidata.org/wiki/Q8188","display_name":"Syllable","level":2,"score":0.5977514386177063},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.49994349479675293},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4730093777179718},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.46539223194122314},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.32783517241477966},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.11227068305015564},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/ncc60321.2024.10486028","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ncc60321.2024.10486028","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 National Conference on Communications (NCC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":34,"referenced_works":["https://openalex.org/W135108420","https://openalex.org/W1522301498","https://openalex.org/W1665214252","https://openalex.org/W1973657629","https://openalex.org/W1989149221","https://openalex.org/W1996675345","https://openalex.org/W2038428558","https://openalex.org/W2160891550","https://openalex.org/W2167642311","https://openalex.org/W2221705832","https://openalex.org/W2475802957","https://openalex.org/W2517980096","https://openalex.org/W2608319664","https://openalex.org/W2769135762","https://openalex.org/W2973325419","https://openalex.org/W2985773718","https://openalex.org/W3003326990","https://openalex.org/W3005621653","https://openalex.org/W3036601975","https://openalex.org/W3196525293","https://openalex.org/W3209059054","https://openalex.org/W3211224152","https://openalex.org/W4206566734","https://openalex.org/W4224918181","https://openalex.org/W4238329251","https://openalex.org/W4295936633","https://openalex.org/W4385822660","https://openalex.org/W6631190155","https://openalex.org/W6637242042","https://openalex.org/W6637714784","https://openalex.org/W6700506969","https://openalex.org/W6770509160","https://openalex.org/W6780218876","https://openalex.org/W6803378298"],"related_works":["https://openalex.org/W3211091934","https://openalex.org/W2148825993","https://openalex.org/W1547743883","https://openalex.org/W2514064218","https://openalex.org/W2181821413","https://openalex.org/W2132658536","https://openalex.org/W2094188777","https://openalex.org/W2065604758","https://openalex.org/W3156241633","https://openalex.org/W2112609279"],"abstract_inverted_index":{"The":[0],"task":[1,48],"of":[2,119,161],"automatically":[3],"detecting":[4],"syllable":[5,26,124,210],"stress":[6,27,66,125,155,211],"is":[7,203,235],"a":[8],"key":[9],"module":[10],"in":[11,21,95,108,225],"computer-assisted":[12],"language":[13],"learning":[14,39,42,85,143,163,244],"systems.":[15],"There":[16],"are":[17,44,72,88],"numerous":[18],"studies":[19],"proposed":[20,140],"the":[22,53,65,69,81,90,103,117,129,159,199,206,231,238],"literature":[23],"for":[24,46,76,123,154,209],"automatic":[25],"detection":[28,126,212],"by":[29,146,218,241],"using":[30,49,213],"different":[31,36,167],"knowledge-based":[32,50,59,70,93,133,223],"prosodic":[33],"features.":[34,51,134],"Also,":[35,99],"statistical":[37],"machine":[38],"and":[40,78,127,152,174,195,220,227,253],"deep":[41,176],"models":[43,104],"explored":[45],"this":[47,113],"However,":[52],"acoustic":[54],"parameters":[55],"considered":[56],"to":[57,105],"compute":[58],"features":[60,71,94,224],"might":[61],"not":[62,73],"always":[63,74],"represent":[64],"phenomena,":[67],"hence":[68],"suitable":[75],"generalization":[77],"scalability.":[79],"Recently,":[80],"rapidly":[82],"emerging":[83],"self-supervised":[84,120,214],"based":[86],"representations":[87,101,121,215,239,258],"outperforming":[89],"existing":[91],"state-of-the-art":[92,132,168],"all":[96],"speech":[97],"applications.":[98],"these":[100],"allow":[102],"be":[106],"built":[107],"an":[109],"end-to-end":[110],"fashion.":[111],"In":[112],"work,":[114],"we":[115,136],"explore":[116],"use":[118,137],"(Wav2Vec-2.0),":[122],"compare":[128],"performance":[130,160],"with":[131,165,246,257,261],"Further,":[135],"our":[138],"recently":[139],"explicit":[141,242],"representation":[142,162,243],"framework,":[144],"modeled":[145],"jointly":[147],"optimizing":[148],"variational":[149],"autoencoder":[150],"(VAE)":[151],"DNN":[153],"detection.":[156],"We":[157,180],"analyze":[158],"framework":[164,245],"two":[166,184],"classifiers,":[169],"support":[170],"vector":[171],"machines":[172],"(SVM)":[173],"simple":[175,262],"neural":[177],"network":[178],"(DNN).":[179],"conduct":[181],"experiments":[182],"on":[183],"non-native":[185],"English":[186],"speakers'":[187],"datasets":[188],"from":[189],"ISLE":[190],"corpus":[191],"i.e.,":[192],"German":[193],"(GER),":[194],"Italian":[196],"(ITA).":[197],"From":[198,230],"analysis":[200],"study,":[201],"it":[202,234],"observed":[204,236],"that":[205,237],"classification":[207],"accuracy":[208],"significantly":[216],"improved":[217],"3.2%":[219],"2.7%":[221],"over":[222],"GER":[226],"ITA,":[228],"respectively.":[229],"t-SNE":[232],"plots,":[233],"learned":[240,259],"VAE":[247],"show":[248],"better":[249],"discrimination":[250],"among":[251],"stressed":[252],"unstressed":[254],"syllables":[255],"compared":[256],"implicitly":[260],"DNN.":[263]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":3}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
