{"id":"https://openalex.org/W4415434080","doi":"https://doi.org/10.1016/j.engappai.2025.112739","title":"Temporal diffuser: Timing scale-aware modulation for sign language production","display_name":"Temporal diffuser: Timing scale-aware modulation for sign language production","publication_year":2025,"publication_date":"2025-10-22","ids":{"openalex":"https://openalex.org/W4415434080","doi":"https://doi.org/10.1016/j.engappai.2025.112739"},"language":"en","primary_location":{"id":"doi:10.1016/j.engappai.2025.112739","is_oa":true,"landing_page_url":"https://doi.org/10.1016/j.engappai.2025.112739","pdf_url":null,"source":{"id":"https://openalex.org/S900972176","display_name":"Engineering Applications of Artificial Intelligence","issn_l":"0952-1976","issn":["0952-1976","1873-6769"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320990","host_organization_name":"Elsevier BV","host_organization_lineage":["https://openalex.org/P4310320990"],"host_organization_lineage_names":["Elsevier BV"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Engineering Applications of Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://doi.org/10.1016/j.engappai.2025.112739","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5120093994","display_name":"Kim-Thuy Kha","orcid":null},"institutions":[{"id":"https://openalex.org/I141445968","display_name":"Ton Duc Thang University","ror":"https://ror.org/01drq0835","country_code":"VN","type":"education","lineage":["https://openalex.org/I141445968"]}],"countries":["VN"],"is_corresponding":false,"raw_author_name":"Kim-Thuy Kha","raw_affiliation_strings":["Faculty of Information Technology, Ton Duc Thang University, Ho Chi Minh City, Vietnam"],"affiliations":[{"raw_affiliation_string":"Faculty of Information Technology, Ton Duc Thang University, Ho Chi Minh City, Vietnam","institution_ids":["https://openalex.org/I141445968"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047569226","display_name":"Anh H. Vo","orcid":"https://orcid.org/0000-0002-7565-5736"},"institutions":[{"id":"https://openalex.org/I28777354","display_name":"Sejong University","ror":"https://ror.org/00aft1q37","country_code":"KR","type":"education","lineage":["https://openalex.org/I28777354"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Anh H. Vo","raw_affiliation_strings":["Department of Computer Engineering, Sejong University, Republic of Korea"],"affiliations":[{"raw_affiliation_string":"Department of Computer Engineering, Sejong University, Republic of Korea","institution_ids":["https://openalex.org/I28777354"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5017407091","display_name":"Van-Vang Le","orcid":"https://orcid.org/0000-0002-2870-4240"},"institutions":[{"id":"https://openalex.org/I141445968","display_name":"Ton Duc Thang University","ror":"https://ror.org/01drq0835","country_code":"VN","type":"education","lineage":["https://openalex.org/I141445968"]}],"countries":["VN"],"is_corresponding":true,"raw_author_name":"Van-Vang Le","raw_affiliation_strings":["Faculty of Information Technology, Ton Duc Thang University, Ho Chi Minh City, Vietnam"],"affiliations":[{"raw_affiliation_string":"Faculty of Information Technology, Ton Duc Thang University, Ho Chi Minh City, Vietnam","institution_ids":["https://openalex.org/I141445968"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5051310667","display_name":"Oh-Young Song","orcid":"https://orcid.org/0000-0002-7142-5976"},"institutions":[{"id":"https://openalex.org/I28777354","display_name":"Sejong University","ror":"https://ror.org/00aft1q37","country_code":"KR","type":"education","lineage":["https://openalex.org/I28777354"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Oh-Young Song","raw_affiliation_strings":["Department of Software, Sejong University, Republic of Korea"],"affiliations":[{"raw_affiliation_string":"Department of Software, Sejong University, Republic of Korea","institution_ids":["https://openalex.org/I28777354"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5064727471","display_name":"Yong-Guk Kim","orcid":"https://orcid.org/0000-0003-4645-1395"},"institutions":[{"id":"https://openalex.org/I28777354","display_name":"Sejong University","ror":"https://ror.org/00aft1q37","country_code":"KR","type":"education","lineage":["https://openalex.org/I28777354"]}],"countries":["KR"],"is_corresponding":true,"raw_author_name":"Yong-Guk Kim","raw_affiliation_strings":["Department of Computer Engineering, Sejong University, Republic of Korea"],"affiliations":[{"raw_affiliation_string":"Department of Computer Engineering, Sejong University, Republic of Korea","institution_ids":["https://openalex.org/I28777354"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5017407091","https://openalex.org/A5064727471"],"corresponding_institution_ids":["https://openalex.org/I141445968","https://openalex.org/I28777354"],"apc_list":{"value":3170,"currency":"USD","value_usd":3170},"apc_paid":{"value":3170,"currency":"USD","value_usd":3170},"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.29463399,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"163","issue":null,"first_page":"112739","last_page":"112739"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11398","display_name":"Hand Gesture Recognition Systems","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1709","display_name":"Human-Computer Interaction"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11398","display_name":"Hand Gesture Recognition Systems","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1709","display_name":"Human-Computer Interaction"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10914","display_name":"Tactile and Sensory Interactions","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9947999715805054,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/sign-language","display_name":"Sign language","score":0.522599995136261},{"id":"https://openalex.org/keywords/pooling","display_name":"Pooling","score":0.459199994802475},{"id":"https://openalex.org/keywords/noise-reduction","display_name":"Noise reduction","score":0.3953999876976013},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.39160001277923584},{"id":"https://openalex.org/keywords/fidelity","display_name":"Fidelity","score":0.3626999855041504},{"id":"https://openalex.org/keywords/autoregressive-model","display_name":"Autoregressive model","score":0.36010000109672546},{"id":"https://openalex.org/keywords/flexibility","display_name":"Flexibility (engineering)","score":0.36000001430511475},{"id":"https://openalex.org/keywords/modulation","display_name":"Modulation (music)","score":0.3573000133037567},{"id":"https://openalex.org/keywords/production","display_name":"Production (economics)","score":0.3549000024795532},{"id":"https://openalex.org/keywords/motion","display_name":"Motion (physics)","score":0.34130001068115234}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8955000042915344},{"id":"https://openalex.org/C522192633","wikidata":"https://www.wikidata.org/wiki/Q34228","display_name":"Sign language","level":2,"score":0.522599995136261},{"id":"https://openalex.org/C70437156","wikidata":"https://www.wikidata.org/wiki/Q7228652","display_name":"Pooling","level":2,"score":0.459199994802475},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4415000081062317},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.4189999997615814},{"id":"https://openalex.org/C163294075","wikidata":"https://www.wikidata.org/wiki/Q581861","display_name":"Noise reduction","level":2,"score":0.3953999876976013},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.39160001277923584},{"id":"https://openalex.org/C2776459999","wikidata":"https://www.wikidata.org/wiki/Q2119376","display_name":"Fidelity","level":2,"score":0.3626999855041504},{"id":"https://openalex.org/C159877910","wikidata":"https://www.wikidata.org/wiki/Q2202883","display_name":"Autoregressive model","level":2,"score":0.36010000109672546},{"id":"https://openalex.org/C2780598303","wikidata":"https://www.wikidata.org/wiki/Q65921492","display_name":"Flexibility (engineering)","level":2,"score":0.36000001430511475},{"id":"https://openalex.org/C123079801","wikidata":"https://www.wikidata.org/wiki/Q750240","display_name":"Modulation (music)","level":2,"score":0.3573000133037567},{"id":"https://openalex.org/C2778348673","wikidata":"https://www.wikidata.org/wiki/Q739302","display_name":"Production (economics)","level":2,"score":0.3549000024795532},{"id":"https://openalex.org/C104114177","wikidata":"https://www.wikidata.org/wiki/Q79782","display_name":"Motion (physics)","level":2,"score":0.34130001068115234},{"id":"https://openalex.org/C139676723","wikidata":"https://www.wikidata.org/wiki/Q1193832","display_name":"Sign (mathematics)","level":2,"score":0.33489999175071716},{"id":"https://openalex.org/C205711294","wikidata":"https://www.wikidata.org/wiki/Q176953","display_name":"Rendering (computer graphics)","level":2,"score":0.3343000113964081},{"id":"https://openalex.org/C207347870","wikidata":"https://www.wikidata.org/wiki/Q371174","display_name":"Gesture","level":2,"score":0.33250001072883606},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.32420000433921814},{"id":"https://openalex.org/C39394851","wikidata":"https://www.wikidata.org/wiki/Q921594","display_name":"Inter frame","level":4,"score":0.31520000100135803},{"id":"https://openalex.org/C111335779","wikidata":"https://www.wikidata.org/wiki/Q3454686","display_name":"Reduction (mathematics)","level":2,"score":0.31299999356269836},{"id":"https://openalex.org/C10161872","wikidata":"https://www.wikidata.org/wiki/Q557891","display_name":"Motion estimation","level":2,"score":0.3093999922275543},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3028999865055084},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.29330000281333923},{"id":"https://openalex.org/C187691185","wikidata":"https://www.wikidata.org/wiki/Q2020720","display_name":"Grid","level":2,"score":0.2851000130176544},{"id":"https://openalex.org/C2776737515","wikidata":"https://www.wikidata.org/wiki/Q14759","display_name":"American Sign Language","level":3,"score":0.28349998593330383},{"id":"https://openalex.org/C2777708103","wikidata":"https://www.wikidata.org/wiki/Q852589","display_name":"Motion blur","level":3,"score":0.2782000005245209},{"id":"https://openalex.org/C2777402240","wikidata":"https://www.wikidata.org/wiki/Q6783436","display_name":"Masking (illustration)","level":2,"score":0.274399995803833},{"id":"https://openalex.org/C77277458","wikidata":"https://www.wikidata.org/wiki/Q1969246","display_name":"Temporal database","level":2,"score":0.2703000009059906},{"id":"https://openalex.org/C3261483","wikidata":"https://www.wikidata.org/wiki/Q119565","display_name":"Frame rate","level":2,"score":0.26260000467300415},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.26109999418258667},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.2542000114917755},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.2515999972820282}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1016/j.engappai.2025.112739","is_oa":true,"landing_page_url":"https://doi.org/10.1016/j.engappai.2025.112739","pdf_url":null,"source":{"id":"https://openalex.org/S900972176","display_name":"Engineering Applications of Artificial Intelligence","issn_l":"0952-1976","issn":["0952-1976","1873-6769"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320990","host_organization_name":"Elsevier BV","host_organization_lineage":["https://openalex.org/P4310320990"],"host_organization_lineage_names":["Elsevier BV"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Engineering Applications of Artificial Intelligence","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1016/j.engappai.2025.112739","is_oa":true,"landing_page_url":"https://doi.org/10.1016/j.engappai.2025.112739","pdf_url":null,"source":{"id":"https://openalex.org/S900972176","display_name":"Engineering Applications of Artificial Intelligence","issn_l":"0952-1976","issn":["0952-1976","1873-6769"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320990","host_organization_name":"Elsevier BV","host_organization_lineage":["https://openalex.org/P4310320990"],"host_organization_lineage_names":["Elsevier BV"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Engineering Applications of Artificial Intelligence","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1458670704","display_name":null,"funder_award_id":"2024-0-00037","funder_id":"https://openalex.org/F4320330380","funder_display_name":"Information Technology Research Centre"},{"id":"https://openalex.org/G1479188259","display_name":null,"funder_award_id":"2020R1A6A1A03038540","funder_id":"https://openalex.org/F4320321408","funder_display_name":"Ministry of Education"},{"id":"https://openalex.org/G2326592593","display_name":null,"funder_award_id":"IITP-2022-RS-2022-00156354","funder_id":"https://openalex.org/F4320321287","funder_display_name":"Sejong University"},{"id":"https://openalex.org/G4759061179","display_name":null,"funder_award_id":"IITP-2025-RS-2022-00156354","funder_id":"https://openalex.org/F4320330380","funder_display_name":"Information Technology Research Centre"},{"id":"https://openalex.org/G6155231739","display_name":null,"funder_award_id":"IITP-2025-RS-2023-00254529","funder_id":"https://openalex.org/F4320330380","funder_display_name":"Information Technology Research Centre"},{"id":"https://openalex.org/G823004890","display_name":null,"funder_award_id":"RS-2019-II190231","funder_id":"https://openalex.org/F4320328359","funder_display_name":"Ministry of Science and ICT, South Korea"}],"funders":[{"id":"https://openalex.org/F4320321287","display_name":"Sejong University","ror":"https://ror.org/00aft1q37"},{"id":"https://openalex.org/F4320321408","display_name":"Ministry of Education","ror":"https://ror.org/01p262204"},{"id":"https://openalex.org/F4320322120","display_name":"National Research Foundation of Korea","ror":"https://ror.org/013aysd81"},{"id":"https://openalex.org/F4320328359","display_name":"Ministry of Science and ICT, South Korea","ror":"https://ror.org/01wpjm123"},{"id":"https://openalex.org/F4320330380","display_name":"Information Technology Research Centre","ror":null},{"id":"https://openalex.org/F4320335489","display_name":"Institute for Information and Communications Technology Promotion","ror":"https://ror.org/01g0hqq23"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":45,"referenced_works":["https://openalex.org/W1906868929","https://openalex.org/W2250689755","https://openalex.org/W2607303097","https://openalex.org/W2962730651","https://openalex.org/W2963608065","https://openalex.org/W2966937616","https://openalex.org/W2978956737","https://openalex.org/W2997573805","https://openalex.org/W3017116930","https://openalex.org/W3036167779","https://openalex.org/W3121370741","https://openalex.org/W3161569725","https://openalex.org/W3168702146","https://openalex.org/W3206301866","https://openalex.org/W4200554106","https://openalex.org/W4230527737","https://openalex.org/W4283642383","https://openalex.org/W4285306484","https://openalex.org/W4288099666","https://openalex.org/W4292379757","https://openalex.org/W4320853786","https://openalex.org/W4368754780","https://openalex.org/W4377195271","https://openalex.org/W4385571029","https://openalex.org/W4385605307","https://openalex.org/W4386076575","https://openalex.org/W4386566683","https://openalex.org/W4387319009","https://openalex.org/W4390872326","https://openalex.org/W4390872693","https://openalex.org/W4393148254","https://openalex.org/W4402660144","https://openalex.org/W4402672102","https://openalex.org/W4402683877","https://openalex.org/W4402727798","https://openalex.org/W4402753616","https://openalex.org/W4403918578","https://openalex.org/W4404781759","https://openalex.org/W4404965692","https://openalex.org/W4406036184","https://openalex.org/W4409985895","https://openalex.org/W4411244490","https://openalex.org/W4411245378","https://openalex.org/W4411350223","https://openalex.org/W4412792210"],"related_works":[],"abstract_inverted_index":{"Recent":[0],"advances":[1],"in":[2,76],"Sign":[3,59],"Language":[4,60],"Production":[5,61],"(SLP)":[6],"highlight":[7],"denoising":[8],"diffusion":[9],"models":[10,45],"as":[11],"promising":[12],"alternatives":[13],"to":[14,47,109],"traditional":[15],"autoregressive":[16],"methods.":[17],"Most":[18],"existing":[19],"approaches":[20],"follow":[21],"a":[22,66,87,110,114,128],"two-stage":[23],"pipeline":[24],"that":[25,72,90,133,148],"encodes":[26],"sign":[27,101],"motion":[28,75,140],"into":[29],"discrete":[30],"latent":[31],"codes,":[32],"often":[33,51],"sacrificing":[34],"Space\u2013Time":[35,88],"fidelity":[36],"and":[37,100,145,155,160],"requiring":[38],"gloss":[39],"annotations":[40],"or":[41,113],"complex":[42],"codebooks.":[43],"Transformer-based":[44],"aim":[46],"simplify":[48],"this,":[49],"but":[50],"produce":[52],"overly":[53],"smooth,":[54],"unnatural":[55],"motions.":[56],"We":[57],"introduce":[58],"with":[62],"Scale-Aware":[63,130],"Modulation":[64,131],"(SignSAM),":[65],"novel":[67],"single-stage,":[68],"gloss-free":[69],"SLP":[70],"framework":[71],"directly":[73],"synthesizes":[74],"continuous":[77],"space,":[78],"preserving":[79],"fine":[80],"temporal":[81,93,124,136],"details.":[82],"At":[83],"its":[84],"core":[85],"is":[86,168],"U-Net":[89],"learns":[91],"compact":[92],"features":[94],"by":[95],"jointly":[96],"downscaling":[97],"the":[98],"frame":[99],"feature":[102],"dimensions,":[103],"thereby":[104],"reducing":[105],"computational":[106],"cost":[107],"compared":[108],"no-pyramid":[111],"UNet":[112,116],"pyramid":[115],"without":[117],"consistency":[118],"between":[119],"dimensions.":[120],"To":[121],"further":[122],"enhance":[123],"precision,":[125],"we":[126],"propose":[127],"Timing":[129],"module":[132],"fuses":[134],"multiscale":[135],"resolutions":[137],"for":[138,163],"better":[139],"coherence.":[141],"Experiments":[142],"on":[143],"PHOENIX14T":[144],"How2Sign":[146],"show":[147],"SignSAM":[149],"achieves":[150],"state-of-the-art":[151],"(SOTA)":[152],"fluency,":[153],"accuracy,":[154],"naturalness,":[156],"offering":[157],"an":[158],"efficient":[159],"expressive":[161],"solution":[162],"SLP.":[164],"Our":[165],"project":[166],"homepage":[167],"https://kha-kim-thuy.github.io/SLP-Demo/":[169],".":[170]},"counts_by_year":[],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-10-24T00:00:00"}
