{"id":"https://openalex.org/W4393658120","doi":"https://doi.org/10.1109/o-cocosda60357.2023.10482966","title":"IIITH MM2 Speech-Text: A preliminary data for automatic spoken data validation with matched and mismatched speech-text content","display_name":"IIITH MM2 Speech-Text: A preliminary data for automatic spoken data validation with matched and mismatched speech-text content","publication_year":2023,"publication_date":"2023-12-04","ids":{"openalex":"https://openalex.org/W4393658120","doi":"https://doi.org/10.1109/o-cocosda60357.2023.10482966"},"language":"en","primary_location":{"id":"doi:10.1109/o-cocosda60357.2023.10482966","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/o-cocosda60357.2023.10482966","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 26th Conference of the Oriental COCOSDA International Committee for the Co-ordination and Standardisation of Speech Databases and Assessment Techniques (O-COCOSDA)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5046482092","display_name":"Nayan Anand","orcid":null},"institutions":[{"id":"https://openalex.org/I65181880","display_name":"Indian Institute of Technology Hyderabad","ror":"https://ror.org/01j4v3x97","country_code":"IN","type":"education","lineage":["https://openalex.org/I65181880"]}],"countries":["IN"],"is_corresponding":true,"raw_author_name":"Nayan Anand","raw_affiliation_strings":["LTRC IIIT Hyderabad,Speech Processing Lab,Hyderabad,India","Speech Processing Lab, LTRC IIIT Hyderabad, Hyderabad, India"],"affiliations":[{"raw_affiliation_string":"LTRC IIIT Hyderabad,Speech Processing Lab,Hyderabad,India","institution_ids":["https://openalex.org/I65181880"]},{"raw_affiliation_string":"Speech Processing Lab, LTRC IIIT Hyderabad, Hyderabad, India","institution_ids":["https://openalex.org/I65181880"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5092189617","display_name":"Meenakshi Sirigiraju","orcid":null},"institutions":[{"id":"https://openalex.org/I65181880","display_name":"Indian Institute of Technology Hyderabad","ror":"https://ror.org/01j4v3x97","country_code":"IN","type":"education","lineage":["https://openalex.org/I65181880"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Meenakshi Sirigiraju","raw_affiliation_strings":["LTRC IIIT Hyderabad,Speech Processing Lab,Hyderabad,India","Speech Processing Lab, LTRC IIIT Hyderabad, Hyderabad, India"],"affiliations":[{"raw_affiliation_string":"LTRC IIIT Hyderabad,Speech Processing Lab,Hyderabad,India","institution_ids":["https://openalex.org/I65181880"]},{"raw_affiliation_string":"Speech Processing Lab, LTRC IIIT Hyderabad, Hyderabad, India","institution_ids":["https://openalex.org/I65181880"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5064062192","display_name":"Chiranjeevi Yarra","orcid":"https://orcid.org/0000-0002-0574-8777"},"institutions":[{"id":"https://openalex.org/I65181880","display_name":"Indian Institute of Technology Hyderabad","ror":"https://ror.org/01j4v3x97","country_code":"IN","type":"education","lineage":["https://openalex.org/I65181880"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Chiranjeevi Yarra","raw_affiliation_strings":["LTRC IIIT Hyderabad,Speech Processing Lab,Hyderabad,India","Speech Processing Lab, LTRC IIIT Hyderabad, Hyderabad, India"],"affiliations":[{"raw_affiliation_string":"LTRC IIIT Hyderabad,Speech Processing Lab,Hyderabad,India","institution_ids":["https://openalex.org/I65181880"]},{"raw_affiliation_string":"Speech Processing Lab, LTRC IIIT Hyderabad, Hyderabad, India","institution_ids":["https://openalex.org/I65181880"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5046482092"],"corresponding_institution_ids":["https://openalex.org/I65181880"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.21699253,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"30","issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9714999794960022,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9714999794960022,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7846832275390625},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6612293720245361},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.6297565698623657},{"id":"https://openalex.org/keywords/speech-synthesis","display_name":"Speech synthesis","score":0.5658795833587646},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4859277307987213},{"id":"https://openalex.org/keywords/speech-technology","display_name":"Speech technology","score":0.4743558168411255},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.46729615330696106},{"id":"https://openalex.org/keywords/speech-corpus","display_name":"Speech corpus","score":0.45819056034088135},{"id":"https://openalex.org/keywords/content","display_name":"Content (measure theory)","score":0.44225114583969116},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.05498245358467102}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7846832275390625},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6612293720245361},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6297565698623657},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.5658795833587646},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4859277307987213},{"id":"https://openalex.org/C504749915","wikidata":"https://www.wikidata.org/wiki/Q9010971","display_name":"Speech technology","level":3,"score":0.4743558168411255},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.46729615330696106},{"id":"https://openalex.org/C91863865","wikidata":"https://www.wikidata.org/wiki/Q4349497","display_name":"Speech corpus","level":3,"score":0.45819056034088135},{"id":"https://openalex.org/C2778152352","wikidata":"https://www.wikidata.org/wiki/Q5165061","display_name":"Content (measure theory)","level":2,"score":0.44225114583969116},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.05498245358467102},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/o-cocosda60357.2023.10482966","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/o-cocosda60357.2023.10482966","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 26th Conference of the Oriental COCOSDA International Committee for the Co-ordination and Standardisation of Speech Databases and Assessment Techniques (O-COCOSDA)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":37,"referenced_works":["https://openalex.org/W1494198834","https://openalex.org/W1507300385","https://openalex.org/W1584308190","https://openalex.org/W1901616594","https://openalex.org/W1976526581","https://openalex.org/W1999404243","https://openalex.org/W2004902747","https://openalex.org/W2014196286","https://openalex.org/W2045956438","https://openalex.org/W2059652594","https://openalex.org/W2090861223","https://openalex.org/W2105976995","https://openalex.org/W2128160875","https://openalex.org/W2407151108","https://openalex.org/W2625373267","https://openalex.org/W2655561246","https://openalex.org/W2946595616","https://openalex.org/W2962949934","https://openalex.org/W2976128099","https://openalex.org/W2988916019","https://openalex.org/W3014393908","https://openalex.org/W3036601975","https://openalex.org/W3121914243","https://openalex.org/W3127686677","https://openalex.org/W3144571334","https://openalex.org/W3147984406","https://openalex.org/W3160786003","https://openalex.org/W4362515116","https://openalex.org/W4372338196","https://openalex.org/W6631362777","https://openalex.org/W6763088532","https://openalex.org/W6768107564","https://openalex.org/W6780218876","https://openalex.org/W6788328058","https://openalex.org/W6792646505","https://openalex.org/W6851775633","https://openalex.org/W6910546390"],"related_works":["https://openalex.org/W4200068392","https://openalex.org/W2184371793","https://openalex.org/W2537969829","https://openalex.org/W596245619","https://openalex.org/W4388404911","https://openalex.org/W4312742405","https://openalex.org/W2105439218","https://openalex.org/W168921769","https://openalex.org/W2147998355","https://openalex.org/W2014684632"],"abstract_inverted_index":{"The":[0,55,174],"demand":[1,64],"for":[2,91,137,228,259],"high-quality":[3,40,78],"speech":[4,15,20,32,52,113,178],"data":[5,30,41,79,83,93],"has":[6],"been":[7],"increasing":[8],"as":[9,61],"deep-learning":[10],"approaches":[11],"gain":[12],"popularity":[13],"in":[14,153,271],"applications.":[16],"Among":[17],"these,":[18],"automatic":[19,92],"recognition":[21],"(ASR)":[22],"and":[23,33,53,69,107,155,160,170,207,222,236,252,268],"text-to-speech":[24],"(TTS)":[25],"require":[26],"large":[27],"amount":[28],"of":[29,167,232,242,277],"containing":[31,105,158,249],"the":[34,63,67,77,82,122,131,180,183,191,204,208,216,220,223,230,240,272],"corresponding":[35],"text.":[36,54,193],"For":[37,121],"these":[38],"applications,":[39],"is":[42,58,103,143,203,210],"often":[43],"obtained":[44],"through":[45],"manual":[46,56],"validation,":[47,94],"which":[48,138],"ensures":[49],"matching":[50],"between":[51],"validation":[57],"not":[59],"scalable":[60],"per":[62,199],"due":[65],"to":[66,74,76],"cost":[68],"time":[70],"involved.":[71],"In":[72,88],"order":[73],"cater":[75],"demand,":[80],"validating":[81],"automatically":[84],"could":[85],"be":[86],"useful.":[87],"this":[89],"work,":[90],"a":[95,139,164,256,263],"spoken":[96,187],"English":[97],"corpus":[98],"named":[99],"IIITH":[100],"MM2":[101],"Speech-Text":[102],"created,":[104],"matched":[106,154,251],"mismatched":[108,156,175,253],"speech-text":[109,233,260],"pairs":[110],"under":[111],"read":[112],"conditions":[114],"from":[115,130,149,179],"Indian":[116],"speakers":[117,184],"with":[118,163],"different":[119],"nativities.":[120],"creation,":[123],"we":[124],"consider":[125],"100":[126],"unique":[127],"stimuli":[128,146,198],"selected":[129],"TIMIT":[132],"corpus,":[133],"ensuring":[134],"phonetic":[135],"richness,":[136],"joint":[140],"entropy":[141],"maximization":[142],"proposed.":[144],"These":[145],"are":[147,226],"recorded":[148],"50":[150],"speakers,":[151],"resulting":[152],"sets":[157],"5000":[159],"764":[161],"utterances":[162],"total":[165],"duration":[166],"6":[168],"hours":[169],"1":[171],"hour,":[172],"respectively.":[173,238],"set":[176],"contains":[177,196],"instances":[181],"where":[182],"naturally":[185],"made":[186],"errors":[188],"while":[189],"reading":[190],"reference":[192,205,221],"It":[194],"also":[195],"two":[197],"utterance,":[200],"one":[201],"stimulus":[202],"text,":[206],"other":[209],"manually":[211],"annotated":[212,224],"text":[213,225],"that":[214],"reflects":[215],"erroneous":[217],"speech.":[218],"Thus,":[219],"used":[227],"building":[229],"models":[231],"mismatch":[234,261],"detection":[235,273],"correction,":[237],"To":[239],"best":[241],"our":[243],"knowledge,":[244],"no":[245],"such":[246],"corpora":[247],"exist":[248],"both":[250],"speech-text.":[254],"As":[255],"preliminary":[257],"analysis":[258],"detection,":[262],"baseline":[264],"considering":[265],"Wav2Vec-2.0":[266],"representations":[267],"DTW":[269],"results":[270],"F<inf":[274],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[275],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">1</inf>-score":[276],"0.87.":[278]},"counts_by_year":[],"updated_date":"2025-12-21T01:58:51.020947","created_date":"2025-10-10T00:00:00"}
