{"id":"https://openalex.org/W4410341396","doi":"https://doi.org/10.1109/ncc63735.2025.10982852","title":"A Preliminary Analysis of Automatic Word and Syllable Prominence Detection in Non-Native Speech with Text-to-Speech Prosody Embeddings","display_name":"A Preliminary Analysis of Automatic Word and Syllable Prominence Detection in Non-Native Speech with Text-to-Speech Prosody Embeddings","publication_year":2025,"publication_date":"2025-03-06","ids":{"openalex":"https://openalex.org/W4410341396","doi":"https://doi.org/10.1109/ncc63735.2025.10982852"},"language":"en","primary_location":{"id":"doi:10.1109/ncc63735.2025.10982852","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ncc63735.2025.10982852","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 National Conference on Communications (NCC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102013926","display_name":"Anindita Mondal","orcid":"https://orcid.org/0000-0002-3247-357X"},"institutions":[{"id":"https://openalex.org/I65181880","display_name":"Indian Institute of Technology Hyderabad","ror":"https://ror.org/01j4v3x97","country_code":"IN","type":"education","lineage":["https://openalex.org/I65181880"]}],"countries":["IN"],"is_corresponding":true,"raw_author_name":"Anindita Mondal","raw_affiliation_strings":["IIIT Hyderabad,Language Technologies Research Center,India"],"affiliations":[{"raw_affiliation_string":"IIIT Hyderabad,Language Technologies Research Center,India","institution_ids":["https://openalex.org/I65181880"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5115405383","display_name":"Rangavajjala Sankara Bharadwaj","orcid":null},"institutions":[{"id":"https://openalex.org/I65181880","display_name":"Indian Institute of Technology Hyderabad","ror":"https://ror.org/01j4v3x97","country_code":"IN","type":"education","lineage":["https://openalex.org/I65181880"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Rangavajjala Sankara Bharadwaj","raw_affiliation_strings":["IIIT Hyderabad,Language Technologies Research Center,India"],"affiliations":[{"raw_affiliation_string":"IIIT Hyderabad,Language Technologies Research Center,India","institution_ids":["https://openalex.org/I65181880"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5008931819","display_name":"Jhansi Mallela","orcid":null},"institutions":[{"id":"https://openalex.org/I65181880","display_name":"Indian Institute of Technology Hyderabad","ror":"https://ror.org/01j4v3x97","country_code":"IN","type":"education","lineage":["https://openalex.org/I65181880"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Jhansi Mallela","raw_affiliation_strings":["IIIT Hyderabad,Language Technologies Research Center,India"],"affiliations":[{"raw_affiliation_string":"IIIT Hyderabad,Language Technologies Research Center,India","institution_ids":["https://openalex.org/I65181880"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5007569113","display_name":"Anil Kumar Vuppala","orcid":"https://orcid.org/0000-0001-7795-0408"},"institutions":[{"id":"https://openalex.org/I65181880","display_name":"Indian Institute of Technology Hyderabad","ror":"https://ror.org/01j4v3x97","country_code":"IN","type":"education","lineage":["https://openalex.org/I65181880"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Anil Kumar Vuppala","raw_affiliation_strings":["IIIT Hyderabad,Language Technologies Research Center,India"],"affiliations":[{"raw_affiliation_string":"IIIT Hyderabad,Language Technologies Research Center,India","institution_ids":["https://openalex.org/I65181880"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5064062192","display_name":"Chiranjeevi Yarra","orcid":"https://orcid.org/0000-0002-0574-8777"},"institutions":[{"id":"https://openalex.org/I65181880","display_name":"Indian Institute of Technology Hyderabad","ror":"https://ror.org/01j4v3x97","country_code":"IN","type":"education","lineage":["https://openalex.org/I65181880"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Chiranjeevi Yarra","raw_affiliation_strings":["IIIT Hyderabad,Language Technologies Research Center,India"],"affiliations":[{"raw_affiliation_string":"IIIT Hyderabad,Language Technologies Research Center,India","institution_ids":["https://openalex.org/I65181880"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5102013926"],"corresponding_institution_ids":["https://openalex.org/I65181880"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.05204959,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9799000024795532,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9799000024795532,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10403","display_name":"Phonetics and Phonology Research","score":0.9455000162124634,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.9434999823570251,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/prosody","display_name":"Prosody","score":0.8243708610534668},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.7602630853652954},{"id":"https://openalex.org/keywords/syllable","display_name":"Syllable","score":0.7099580764770508},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6795905828475952},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.5823588371276855},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5028616786003113},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.482318639755249},{"id":"https://openalex.org/keywords/speech-corpus","display_name":"Speech corpus","score":0.4193200469017029},{"id":"https://openalex.org/keywords/speech-synthesis","display_name":"Speech synthesis","score":0.3521278202533722},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.26273173093795776}],"concepts":[{"id":"https://openalex.org/C542774811","wikidata":"https://www.wikidata.org/wiki/Q10880526","display_name":"Prosody","level":2,"score":0.8243708610534668},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7602630853652954},{"id":"https://openalex.org/C109089402","wikidata":"https://www.wikidata.org/wiki/Q8188","display_name":"Syllable","level":2,"score":0.7099580764770508},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6795905828475952},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.5823588371276855},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5028616786003113},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.482318639755249},{"id":"https://openalex.org/C91863865","wikidata":"https://www.wikidata.org/wiki/Q4349497","display_name":"Speech corpus","level":3,"score":0.4193200469017029},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.3521278202533722},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.26273173093795776},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/ncc63735.2025.10982852","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ncc63735.2025.10982852","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 National Conference on Communications (NCC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":22,"referenced_works":["https://openalex.org/W2101601067","https://openalex.org/W2113241166","https://openalex.org/W2167642311","https://openalex.org/W2398044918","https://openalex.org/W2608319664","https://openalex.org/W2765175868","https://openalex.org/W2905749056","https://openalex.org/W2963609956","https://openalex.org/W2964243274","https://openalex.org/W2973325419","https://openalex.org/W3021213627","https://openalex.org/W4230062437","https://openalex.org/W4388878660","https://openalex.org/W4393973357","https://openalex.org/W6610566761","https://openalex.org/W6631003133","https://openalex.org/W6763832098","https://openalex.org/W6777694618","https://openalex.org/W6778823374","https://openalex.org/W6780218876","https://openalex.org/W6796464841","https://openalex.org/W7051469422"],"related_works":["https://openalex.org/W2396586136","https://openalex.org/W1505084104","https://openalex.org/W2599935372","https://openalex.org/W1927421023","https://openalex.org/W10581632","https://openalex.org/W3149582125","https://openalex.org/W157238252","https://openalex.org/W2169632867","https://openalex.org/W2465421051","https://openalex.org/W152045069"],"abstract_inverted_index":{"Automatic":[0],"detection":[1,60,170],"of":[2,53],"prominence":[3,38,59,153,169],"at":[4],"the":[5,26,40,51,70,79,109,112,118,124,132,173,191],"word":[6,166],"and":[7,36,75,84,106,145,183,194],"syllable-levels":[8],"is":[9,67],"critical":[10],"for":[11,58,123,158],"building":[12],"computer-assisted":[13],"language":[14],"learning":[15],"systems.":[16],"It":[17],"has":[18],"been":[19],"shown":[20],"that":[21],"prosody":[22,54],"embeddings":[23,55,71,93,113,175],"learned":[24],"by":[25],"current":[27],"state-of-the-art":[28],"(SOTA)":[29],"text-to-speech":[30],"(TTS)":[31],"systems":[32],"could":[33],"generate":[34],"word-":[35],"syllable-level":[37,168],"in":[39,46],"synthesized":[41],"speech":[42,77,105,142,147],"as":[43,45],"natural":[44],"native":[47,74,141],"speech.":[48],"To":[49],"understand":[50],"effectiveness":[52],"from":[56,73,86,117,131],"TTS":[57,89,119,133,174],"under":[61,96,134],"nonnative":[62],"context,":[63],"a":[64,87],"comparative":[65],"analysis":[66],"conducted":[68,139],"on":[69,140,165],"extracted":[72,95,115],"non-native":[76,146],"considering":[78],"prominence-related":[80],"embeddings:":[81],"duration,":[82],"energy,":[83],"pitch":[85],"SOTA":[88],"named":[90],"FastSpeech2.":[91],"These":[92],"are":[94,114,138,155,176],"two":[97],"conditions":[98],"considering:":[99],"1)":[100],"only":[101],"text,":[102],"2)":[103],"both":[104,159],"text.":[107],"For":[108,150],"first":[110],"condition,":[111,126],"directly":[116],"inference":[120],"mode,":[121],"whereas":[122],"second":[125],"we":[127],"propose":[128],"to":[129,178,188],"extract":[130],"training":[135],"mode.":[136],"Experiments":[137],"corpus:":[143,148],"Tatoeba,":[144],"ISLE.":[149],"experimentation,":[151],"word-level":[152],"locations":[154],"manually":[156],"annotated":[157],"corpora.":[160],"The":[161],"highest":[162],"relative":[163],"improvement":[164],"&":[167,181,185],"accuracies":[171],"with":[172,190],"found":[177],"be":[179],"13.7%":[180],"5.9%":[182],"16.2%":[184],"6.9%":[186],"compared":[187],"those":[189],"heuristic-based":[192],"features":[193],"self-supervised":[195],"Wav2Vec-2.0":[196],"representations,":[197],"respectively.":[198]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
