{"id":"https://openalex.org/W3198544313","doi":"https://doi.org/10.21437/interspeech.2021-1094","title":"Towards Automatic Speech to Sign Language Generation","display_name":"Towards Automatic Speech to Sign Language Generation","publication_year":2021,"publication_date":"2021-08-27","ids":{"openalex":"https://openalex.org/W3198544313","doi":"https://doi.org/10.21437/interspeech.2021-1094","mag":"3198544313"},"language":"en","primary_location":{"id":"doi:10.21437/interspeech.2021-1094","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2021-1094","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2021","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5026743067","display_name":"Parul Kapoor","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Parul Kapoor","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5007660270","display_name":"Rudrabha Mukhopadhyay","orcid":"https://orcid.org/0009-0000-6628-7065"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Rudrabha Mukhopadhyay","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5081130373","display_name":"Sindhu B Hegde","orcid":"https://orcid.org/0009-0005-2845-5570"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sindhu B. Hegde","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5007109424","display_name":"Vinay P. Namboodiri","orcid":"https://orcid.org/0000-0001-5262-9722"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Vinay Namboodiri","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5053112307","display_name":"C. V. Jawahar","orcid":"https://orcid.org/0000-0001-6767-7057"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"C.V. Jawahar","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5026743067"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.6133,"has_fulltext":false,"cited_by_count":25,"citation_normalized_percentile":{"value":0.83164673,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"3700","last_page":"3704"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11398","display_name":"Hand Gesture Recognition Systems","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1709","display_name":"Human-Computer Interaction"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11398","display_name":"Hand Gesture Recognition Systems","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1709","display_name":"Human-Computer Interaction"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9930999875068665,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11285","display_name":"Hearing Impairment and Communication","score":0.9911999702453613,"subfield":{"id":"https://openalex.org/subfields/3204","display_name":"Developmental and Educational Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8785617351531982},{"id":"https://openalex.org/keywords/sign-language","display_name":"Sign language","score":0.655661404132843},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6143589615821838},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.5489929914474487},{"id":"https://openalex.org/keywords/vocabulary","display_name":"Vocabulary","score":0.5355107188224792},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.4737318754196167},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.471731573343277},{"id":"https://openalex.org/keywords/discriminator","display_name":"Discriminator","score":0.4634316861629486},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.46199893951416016},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.4449983537197113},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4235495328903198},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.08903557062149048}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8785617351531982},{"id":"https://openalex.org/C522192633","wikidata":"https://www.wikidata.org/wiki/Q34228","display_name":"Sign language","level":2,"score":0.655661404132843},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6143589615821838},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.5489929914474487},{"id":"https://openalex.org/C2777601683","wikidata":"https://www.wikidata.org/wiki/Q6499736","display_name":"Vocabulary","level":2,"score":0.5355107188224792},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.4737318754196167},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.471731573343277},{"id":"https://openalex.org/C2779803651","wikidata":"https://www.wikidata.org/wiki/Q5282088","display_name":"Discriminator","level":3,"score":0.4634316861629486},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.46199893951416016},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.4449983537197113},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4235495328903198},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.08903557062149048},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C94915269","wikidata":"https://www.wikidata.org/wiki/Q1834857","display_name":"Detector","level":2,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.21437/interspeech.2021-1094","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2021-1094","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2021","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/10","score":0.6499999761581421,"display_name":"Reduced inequalities"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4293202849","https://openalex.org/W1980965563","https://openalex.org/W1489300767","https://openalex.org/W2387995142","https://openalex.org/W4380714744","https://openalex.org/W4319453655","https://openalex.org/W2089959425","https://openalex.org/W2057775761","https://openalex.org/W2964074194","https://openalex.org/W2800597160"],"abstract_inverted_index":{"We":[0,172],"aim":[1],"to":[2,48,128,149,178,196],"solve":[3],"the":[4,19,64,88,104,116,167,180,197],"highly":[5],"challenging":[6],"task":[7,140],"of":[8,66,169,182,185],"generating":[9,30,94],"continuous":[10,151],"sign":[11,45,95,107,152],"language":[12,46,96,108],"videos":[13,32],"solely":[14],"from":[15,33,58,98,132],"speech":[16,43,82,133],"segments":[17],"for":[18,76,93],"first":[20,105],"time.":[21],"Recent":[22],"efforts":[23],"in":[24,155],"this":[25],"space":[26],"have":[27],"focused":[28],"on":[29],"such":[31],"human-annotated":[34],"text":[35,68,113],"transcripts":[36],"without":[37],"considering":[38],"other":[39,164],"modalities.":[40],"However,":[41],"replacing":[42],"with":[44,55,163],"proves":[47],"be":[49],"a":[50,123],"practical":[51],"solution":[52],"while":[53],"communicating":[54],"people":[56],"suffering":[57],"hearing":[59],"loss.":[60],"Therefore,":[61],"we":[62,100,121],"eliminate":[63],"need":[65],"using":[67],"as":[69,137],"input":[70],"and":[71,102,115,141,161],"design":[72],"techniques":[73],"that":[74],"work":[75],"more":[77],"natural,":[78],"continuous,":[79],"freely":[80],"uttered":[81],"covering":[83],"an":[84,138,142,156],"extensive":[85],"vocabulary.":[86],"Since":[87],"current":[89],"datasets":[90],"are":[91],"inadequate":[92],"directly":[97],"speech,":[99],"collect":[101],"release":[103],"Indian":[106],"dataset":[109],"comprising":[110],"speech-level":[111],"annotations,":[112],"transcripts,":[114],"corresponding":[117],"sign-language":[118],"videos.":[119],"Next,":[120],"propose":[122],"multi-tasking":[124],"transformer":[125],"network":[126],"trained":[127],"generate":[129,150],"signer's":[130],"poses":[131],"segments.":[134],"With":[135],"speech-to-text":[136],"auxiliary":[139],"additional":[143,175],"cross-modal":[144],"discriminator,":[145],"our":[146,170,186],"model":[147],"learns":[148],"pose":[153],"sequences":[154],"end-to-end":[157],"manner.":[158],"Extensive":[159],"experiments":[160],"comparisons":[162],"baselines":[165],"demonstrate":[166],"effectiveness":[168],"approach.":[171],"also":[173],"conduct":[174],"ablation":[176],"studies":[177],"analyze":[179],"effect":[181],"different":[183],"modules":[184],"network.":[187],"A":[188],"demo":[189],"video":[190],"containing":[191],"several":[192],"results":[193],"is":[194],"attached":[195],"supplementary":[198],"material.":[199]},"counts_by_year":[{"year":2026,"cited_by_count":3},{"year":2025,"cited_by_count":10},{"year":2024,"cited_by_count":5},{"year":2023,"cited_by_count":4},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":1}],"updated_date":"2026-04-25T08:17:42.794288","created_date":"2025-10-10T00:00:00"}
