{"id":"https://openalex.org/W4410770331","doi":"https://doi.org/10.1109/icasspw65056.2025.11011256","title":"Closing the Loop on Speech to Music Translation: Automatically Generating Synthetic Percussive Sequences on the Mridangam from Konnakol","display_name":"Closing the Loop on Speech to Music Translation: Automatically Generating Synthetic Percussive Sequences on the Mridangam from Konnakol","publication_year":2025,"publication_date":"2025-04-06","ids":{"openalex":"https://openalex.org/W4410770331","doi":"https://doi.org/10.1109/icasspw65056.2025.11011256"},"language":"en","primary_location":{"id":"doi:10.1109/icasspw65056.2025.11011256","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icasspw65056.2025.11011256","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Acoustics, Speech, and Signal Processing Workshops (ICASSPW)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5111288428","display_name":"Gopika Krishnan","orcid":null},"institutions":[{"id":"https://openalex.org/I170486558","display_name":"Universitat Pompeu Fabra","ror":"https://ror.org/04n0g0b29","country_code":"ES","type":"education","lineage":["https://openalex.org/I170486558"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Gopika Krishnan","raw_affiliation_strings":["Universitat Pompeu Fabra,Department of Engineering,Barcelona,Spain"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Universitat Pompeu Fabra,Department of Engineering,Barcelona,Spain","institution_ids":["https://openalex.org/I170486558"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Julia Drabek","orcid":null},"institutions":[{"id":"https://openalex.org/I145311948","display_name":"Johns Hopkins University","ror":"https://ror.org/00za53h95","country_code":"US","type":"education","lineage":["https://openalex.org/I145311948"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Julia Drabek","raw_affiliation_strings":["Johns Hopkins University,Department of Electrical and Computer Engineering,Baltimore,Maryland,United States"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Johns Hopkins University,Department of Electrical and Computer Engineering,Baltimore,Maryland,United States","institution_ids":["https://openalex.org/I145311948"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5039139241","display_name":"Akshay Anantapadmanabhan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Akshay Anantapadmanabhan","raw_affiliation_strings":["Freelance Musician,India"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Freelance Musician,India","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5037231235","display_name":"Kaustuv Kanti Ganguli","orcid":"https://orcid.org/0000-0003-2592-8289"},"institutions":[{"id":"https://openalex.org/I91044093","display_name":"Zayed University","ror":"https://ror.org/03snqfa66","country_code":"AE","type":"education","lineage":["https://openalex.org/I91044093"]}],"countries":["AE"],"is_corresponding":false,"raw_author_name":"Kaustuv Kanti Ganguli","raw_affiliation_strings":["Zayed University,Department of Computational Systems,Abu Dhabi,United Arab Emirates"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Zayed University,Department of Computational Systems,Abu Dhabi,United Arab Emirates","institution_ids":["https://openalex.org/I91044093"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5044583275","display_name":"Carlos Guedes","orcid":"https://orcid.org/0000-0002-1898-2183"},"institutions":[{"id":"https://openalex.org/I120250893","display_name":"New York University Abu Dhabi","ror":"https://ror.org/00e5k0821","country_code":"AE","type":"education","lineage":["https://openalex.org/I120250893","https://openalex.org/I57206974"]}],"countries":["AE"],"is_corresponding":false,"raw_author_name":"Carlos Guedes","raw_affiliation_strings":["New York University Abu Dhabi,Music and Sound Cultures Research Group,Abu Dhabi,United Arab Emirates"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"New York University Abu Dhabi,Music and Sound Cultures Research Group,Abu Dhabi,United Arab Emirates","institution_ids":["https://openalex.org/I120250893"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.4954,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.81781965,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10403","display_name":"Phonetics and Phonology Research","score":0.9279000163078308,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T10403","display_name":"Phonetics and Phonology Research","score":0.9279000163078308,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T13996","display_name":"Diverse Musicological Studies","score":0.9182000160217285,"subfield":{"id":"https://openalex.org/subfields/1210","display_name":"Music"},"field":{"id":"https://openalex.org/fields/12","display_name":"Arts and Humanities"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9118000268936157,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/closing","display_name":"Closing (real estate)","score":0.8004052639007568},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6691486835479736},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6640117764472961},{"id":"https://openalex.org/keywords/loop","display_name":"Loop (graph theory)","score":0.65785813331604},{"id":"https://openalex.org/keywords/translation","display_name":"Translation (biology)","score":0.5477473735809326},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4661470949649811},{"id":"https://openalex.org/keywords/speech-synthesis","display_name":"Speech synthesis","score":0.4404495358467102},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.42986756563186646},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.09666052460670471}],"concepts":[{"id":"https://openalex.org/C2778775528","wikidata":"https://www.wikidata.org/wiki/Q5135432","display_name":"Closing (real estate)","level":2,"score":0.8004052639007568},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6691486835479736},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6640117764472961},{"id":"https://openalex.org/C184670325","wikidata":"https://www.wikidata.org/wiki/Q512604","display_name":"Loop (graph theory)","level":2,"score":0.65785813331604},{"id":"https://openalex.org/C149364088","wikidata":"https://www.wikidata.org/wiki/Q185917","display_name":"Translation (biology)","level":4,"score":0.5477473735809326},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4661470949649811},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.4404495358467102},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.42986756563186646},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.09666052460670471},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C105580179","wikidata":"https://www.wikidata.org/wiki/Q188928","display_name":"Messenger RNA","level":3,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icasspw65056.2025.11011256","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icasspw65056.2025.11011256","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Acoustics, Speech, and Signal Processing Workshops (ICASSPW)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":15,"referenced_works":["https://openalex.org/W1525757074","https://openalex.org/W1972253807","https://openalex.org/W2916979304","https://openalex.org/W2962824709","https://openalex.org/W3134950117","https://openalex.org/W4410772151","https://openalex.org/W6604734077","https://openalex.org/W6631362777","https://openalex.org/W6768298999","https://openalex.org/W6771843545","https://openalex.org/W6773103014","https://openalex.org/W6780218876","https://openalex.org/W6789649506","https://openalex.org/W6847322037","https://openalex.org/W6847363464"],"related_works":["https://openalex.org/W2116422677","https://openalex.org/W2517007886","https://openalex.org/W2383994331","https://openalex.org/W1522019333","https://openalex.org/W278047738","https://openalex.org/W2076369646","https://openalex.org/W2373884197","https://openalex.org/W4244817184","https://openalex.org/W2961923709","https://openalex.org/W2516969888"],"abstract_inverted_index":{"This":[0,119],"paper":[1],"presents":[2],"a":[3,10,55,64,81],"pipeline":[4,120],"to":[5,98,153],"convert":[6],"spoken":[7,37],"Konnakol":[8,31,62,96,127],"sequences,":[9,38],"South":[11],"Indian":[12],"vocal":[13],"percussion":[14],"language,":[15],"into":[16,71],"synthetic":[17],"rhythmic":[18,87],"sequences":[19,70,88],"performed":[20],"on":[21,30,149],"the":[22,26,40,61,95,124,151],"mridangam.":[23],"We":[24],"fine-tune":[25],"Whisper":[27],"speech-to-text":[28],"model":[29],"data,":[32],"enabling":[33],"accurate":[34],"transcription":[35,125],"of":[36,43,126],"despite":[39],"small":[41],"size":[42],"our":[44],"dataset":[45],"(approximately":[46],"15":[47],"minutes).":[48],"The":[49],"transcriptions":[50,76],"are":[51],"rhythmically":[52],"encoded":[53],"in":[54],"format":[56],"that":[57,67,89],"is":[58,104],"compatible":[59],"with":[60,106],"Typewriter,":[63],"web":[65],"application":[66],"converts":[68],"these":[69,75],"mridangam":[72,100],"audio.":[73,101],"Additionally,":[74],"serve":[77],"as":[78],"input":[79],"for":[80,116,132,142],"Markov":[82],"model,":[83],"which":[84],"generates":[85],"new":[86],"can":[90],"also":[91,129],"be":[92],"processed":[93],"through":[94],"Typewriter":[97],"produce":[99],"Whisper\u2019s":[102],"performance":[103],"impressive":[105],"very":[107],"low":[108],"error":[109],"rates,":[110],"making":[111],"it":[112],"an":[113],"ideal":[114],"tool":[115],"this":[117],"task.":[118],"not":[121],"only":[122],"facilitates":[123],"but":[128],"opens":[130],"possibilities":[131],"creating":[133],"educational":[134],"tools,":[135],"preserving":[136],"cultural":[137],"heritage,":[138],"and":[139,156],"generating":[140],"data":[141],"rhythm-based":[143],"applications.":[144],"Future":[145],"work":[146],"will":[147],"focus":[148],"refining":[150],"process":[152],"improve":[154],"accuracy":[155],"versatility.":[157]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
