{"id":"https://openalex.org/W7130374157","doi":"https://doi.org/10.1109/o-cocosda68185.2025.11385116","title":"A Deep Learning Approach to Low-Resource Sanskrit Speech Recognition Using Ctc Loss","display_name":"A Deep Learning Approach to Low-Resource Sanskrit Speech Recognition Using Ctc Loss","publication_year":2025,"publication_date":"2025-11-12","ids":{"openalex":"https://openalex.org/W7130374157","doi":"https://doi.org/10.1109/o-cocosda68185.2025.11385116"},"language":null,"primary_location":{"id":"doi:10.1109/o-cocosda68185.2025.11385116","is_oa":false,"landing_page_url":"https://doi.org/10.1109/o-cocosda68185.2025.11385116","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 28th Conference of the Oriental COCOSDA International Committee for the Co-ordination and Standardisation of Speech Databases and Assessment Techniques (O-COCOSDA)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5126292654","display_name":"Suhani Singh","orcid":null},"institutions":[{"id":"https://openalex.org/I4210143260","display_name":"Indira Gandhi Delhi Technical University for Women","ror":"https://ror.org/057c5p638","country_code":"IN","type":"education","lineage":["https://openalex.org/I4210143260"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Suhani Singh","raw_affiliation_strings":["Department of Information Technology, Indira Gandhi Delhi Technical University for Women,India"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Information Technology, Indira Gandhi Delhi Technical University for Women,India","institution_ids":["https://openalex.org/I4210143260"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5022991626","display_name":"Amita Dev","orcid":"https://orcid.org/0000-0002-6926-9433"},"institutions":[{"id":"https://openalex.org/I4210143260","display_name":"Indira Gandhi Delhi Technical University for Women","ror":"https://ror.org/057c5p638","country_code":"IN","type":"education","lineage":["https://openalex.org/I4210143260"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Amita Dev","raw_affiliation_strings":["Department of Information Technology, Indira Gandhi Delhi Technical University for Women,India"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Information Technology, Indira Gandhi Delhi Technical University for Women,India","institution_ids":["https://openalex.org/I4210143260"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5126303692","display_name":"Poonam Bansal","orcid":null},"institutions":[{"id":"https://openalex.org/I4210143260","display_name":"Indira Gandhi Delhi Technical University for Women","ror":"https://ror.org/057c5p638","country_code":"IN","type":"education","lineage":["https://openalex.org/I4210143260"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Poonam Bansal","raw_affiliation_strings":["Department of Artificial Intelligence and Data Science, Indira Gandhi Delhi Technical University for Women,India"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Artificial Intelligence and Data Science, Indira Gandhi Delhi Technical University for Women,India","institution_ids":["https://openalex.org/I4210143260"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.81618154,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9444000124931335,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9444000124931335,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.01899999938905239,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.004100000020116568,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/sanskrit","display_name":"Sanskrit","score":0.9247999787330627},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.599399983882904},{"id":"https://openalex.org/keywords/rendering","display_name":"Rendering (computer graphics)","score":0.5437999963760376},{"id":"https://openalex.org/keywords/architecture","display_name":"Architecture","score":0.4602999985218048},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.45829999446868896},{"id":"https://openalex.org/keywords/language-understanding","display_name":"Language understanding","score":0.44190001487731934}],"concepts":[{"id":"https://openalex.org/C29912816","wikidata":"https://www.wikidata.org/wiki/Q11059","display_name":"Sanskrit","level":2,"score":0.9247999787330627},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7117999792098999},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.599399983882904},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5898000001907349},{"id":"https://openalex.org/C205711294","wikidata":"https://www.wikidata.org/wiki/Q176953","display_name":"Rendering (computer graphics)","level":2,"score":0.5437999963760376},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5234000086784363},{"id":"https://openalex.org/C123657996","wikidata":"https://www.wikidata.org/wiki/Q12271","display_name":"Architecture","level":2,"score":0.4602999985218048},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.45829999446868896},{"id":"https://openalex.org/C2983448237","wikidata":"https://www.wikidata.org/wiki/Q1078276","display_name":"Language understanding","level":2,"score":0.44190001487731934},{"id":"https://openalex.org/C8521452","wikidata":"https://www.wikidata.org/wiki/Q203790","display_name":"Connectionism","level":3,"score":0.35510000586509705},{"id":"https://openalex.org/C74672266","wikidata":"https://www.wikidata.org/wiki/Q815859","display_name":"Language acquisition","level":2,"score":0.3425000011920929},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.3109000027179718},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.29589998722076416},{"id":"https://openalex.org/C94922259","wikidata":"https://www.wikidata.org/wiki/Q33215","display_name":"Constructed language","level":2,"score":0.2847000062465668},{"id":"https://openalex.org/C2984842247","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep neural networks","level":3,"score":0.26919999718666077},{"id":"https://openalex.org/C121766906","wikidata":"https://www.wikidata.org/wiki/Q1053936","display_name":"Sanskrit literature","level":3,"score":0.25540000200271606},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.2540000081062317}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/o-cocosda68185.2025.11385116","is_oa":false,"landing_page_url":"https://doi.org/10.1109/o-cocosda68185.2025.11385116","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 28th Conference of the Oriental COCOSDA International Committee for the Co-ordination and Standardisation of Speech Databases and Assessment Techniques (O-COCOSDA)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.8530257344245911,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":7,"referenced_works":["https://openalex.org/W1977286842","https://openalex.org/W2063223471","https://openalex.org/W3006119204","https://openalex.org/W3173649224","https://openalex.org/W4229456977","https://openalex.org/W4362716491","https://openalex.org/W4385233554"],"related_works":[],"abstract_inverted_index":{"Sanskrit":[0,26,79,112],"is":[1,80,128],"emerging":[2],"as":[3,192],"endangered":[4],"language":[5,42,65],"because":[6],"of":[7,158,187],"having":[8,85],"only":[9],"2400":[10],"speakers":[11],"left":[12],"in":[13,46],"all":[14,96],"over":[15],"world":[16],"according":[17],"to":[18,132],"census.":[19],"Many":[20],"Indian":[21,83],"languages":[22,172],"have":[23,33,115],"evolved":[24],"from":[25,107],"Language":[27,84],"and":[28,49,94,120,130,149,169],"many":[29],"granthas,":[30],"shlokas,":[31],"mantras":[32],"been":[34,116],"documented":[35],"using":[36,141],"this":[37,41],"language.":[38],"ASR":[39,58,101,143,160],"for":[40,60,100,118,167,176],"will":[43,50],"help":[44],"society":[45],"several":[47],"ways":[48],"protect":[51],"heritage.":[52],"The":[53,103,136,153],"paper":[54],"represents":[55],"an":[56,62],"end-to-end":[57],"system":[59],"Sanskrit,":[61,193],"under-":[63],"resourced":[64],"leveraging":[66],"Deep":[67],"Learning":[68],"architecture":[69,166],"trained":[70,137],"with":[71],"the":[72,122,156,174,184],"Connectionist":[73],"Temporal":[74],"Classification":[75],"(CTC)":[76],"loss":[77],"function.":[78],"a":[81],"classical":[82,189],"rich":[86],"morphological":[87],"structure,":[88],"intrinsic":[89],"grammar,":[90],"limited":[91],"digital":[92],"resources":[93],"these":[95],"poses":[97],"considerable":[98],"challenges":[99],"Task.":[102],"corpus":[104],"V\u0101ksa\u00f1caya\u1e25":[105],"collected":[106],"IIT":[108],"Bombay":[109],"consisting":[110],"annotated":[111],"audio-text":[113],"pairs":[114],"utilized":[117],"training":[119],"testing":[121],"model.":[123],"For":[124],"model":[125],"inputs,":[126],"it":[127],"curated":[129],"preprocessed":[131],"generate":[133],"spectograms":[134],"features.":[135],"System":[138],"was":[139],"evaluated":[140],"standard":[142],"metrics,":[144],"achieving":[145],"41":[146],"%":[147,151],"WER":[148],"22":[150],"CER.":[152],"findings":[154],"illustrate":[155],"viability":[157],"utilizing":[159],"frameworks":[161],"based":[162],"on":[163],"deep":[164],"learning":[165],"low-resource":[168],"morphologically":[170],"intricate":[171],"without":[173],"necessity":[175],"manually":[177],"crafted":[178],"phoneme-level":[179],"segmentation.":[180],"This":[181],"study":[182],"advances":[183],"overarching":[185],"objective":[186],"rendering":[188],"languages,":[190],"such":[191],"accessible":[194],"through":[195],"contemporary":[196],"speech":[197],"technologies.":[198]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-02-19T00:00:00"}
