{"id":"https://openalex.org/W4372341521","doi":"https://doi.org/10.1109/icassp49357.2023.10096441","title":"Diffusion Motion: Generate Text-Guided 3D Human Motion by Diffusion Model","display_name":"Diffusion Motion: Generate Text-Guided 3D Human Motion by Diffusion Model","publication_year":2023,"publication_date":"2023-05-05","ids":{"openalex":"https://openalex.org/W4372341521","doi":"https://doi.org/10.1109/icassp49357.2023.10096441"},"language":"en","primary_location":{"id":"doi:10.1109/icassp49357.2023.10096441","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49357.2023.10096441","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5002534655","display_name":"Zhiyuan Ren","orcid":"https://orcid.org/0000-0003-4560-5102"},"institutions":[{"id":"https://openalex.org/I87216513","display_name":"Michigan State University","ror":"https://ror.org/05hs6h993","country_code":"US","type":"education","lineage":["https://openalex.org/I87216513"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Zhiyuan Ren","raw_affiliation_strings":["Michigan State University,East Lansing,MI,USA,48824"],"affiliations":[{"raw_affiliation_string":"Michigan State University,East Lansing,MI,USA,48824","institution_ids":["https://openalex.org/I87216513"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014233468","display_name":"Zhihong Pan","orcid":"https://orcid.org/0000-0003-0866-762X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhihong Pan","raw_affiliation_strings":["Baidu Research (USA),Sunnyvale,CA,USA,94089"],"affiliations":[{"raw_affiliation_string":"Baidu Research (USA),Sunnyvale,CA,USA,94089","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5065917738","display_name":"Xin Zhou","orcid":"https://orcid.org/0000-0003-4015-4787"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xin Zhou","raw_affiliation_strings":["Baidu Research (USA),Sunnyvale,CA,USA,94089"],"affiliations":[{"raw_affiliation_string":"Baidu Research (USA),Sunnyvale,CA,USA,94089","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5049091907","display_name":"Le Kang","orcid":"https://orcid.org/0000-0003-4262-2329"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Le Kang","raw_affiliation_strings":["Baidu Research (USA),Sunnyvale,CA,USA,94089"],"affiliations":[{"raw_affiliation_string":"Baidu Research (USA),Sunnyvale,CA,USA,94089","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5002534655"],"corresponding_institution_ids":["https://openalex.org/I87216513"],"apc_list":null,"apc_paid":null,"fwci":3.6845,"has_fulltext":false,"cited_by_count":30,"citation_normalized_percentile":{"value":0.94671756,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12290","display_name":"Human Motion and Animation","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.9965000152587891,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7458326816558838},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5850372314453125},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.5581082701683044},{"id":"https://openalex.org/keywords/generative-model","display_name":"Generative model","score":0.5489563941955566},{"id":"https://openalex.org/keywords/motion","display_name":"Motion (physics)","score":0.542086660861969},{"id":"https://openalex.org/keywords/noise-reduction","display_name":"Noise reduction","score":0.5353639125823975},{"id":"https://openalex.org/keywords/probabilistic-logic","display_name":"Probabilistic logic","score":0.4980175495147705},{"id":"https://openalex.org/keywords/hidden-markov-model","display_name":"Hidden Markov model","score":0.4405350685119629},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.4144819676876068},{"id":"https://openalex.org/keywords/generative-grammar","display_name":"Generative grammar","score":0.3955855965614319},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3326488733291626},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.3304026126861572}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7458326816558838},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5850372314453125},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.5581082701683044},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.5489563941955566},{"id":"https://openalex.org/C104114177","wikidata":"https://www.wikidata.org/wiki/Q79782","display_name":"Motion (physics)","level":2,"score":0.542086660861969},{"id":"https://openalex.org/C163294075","wikidata":"https://www.wikidata.org/wiki/Q581861","display_name":"Noise reduction","level":2,"score":0.5353639125823975},{"id":"https://openalex.org/C49937458","wikidata":"https://www.wikidata.org/wiki/Q2599292","display_name":"Probabilistic logic","level":2,"score":0.4980175495147705},{"id":"https://openalex.org/C23224414","wikidata":"https://www.wikidata.org/wiki/Q176769","display_name":"Hidden Markov model","level":2,"score":0.4405350685119629},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.4144819676876068},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.3955855965614319},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3326488733291626},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3304026126861572}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp49357.2023.10096441","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49357.2023.10096441","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.6700000166893005,"display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":32,"referenced_works":["https://openalex.org/W2474702929","https://openalex.org/W2769102608","https://openalex.org/W2896457183","https://openalex.org/W2897492344","https://openalex.org/W2901994722","https://openalex.org/W2963092440","https://openalex.org/W2971856312","https://openalex.org/W2982625143","https://openalex.org/W3036167779","https://openalex.org/W3102937540","https://openalex.org/W3121370741","https://openalex.org/W3124346429","https://openalex.org/W3125772723","https://openalex.org/W3129651364","https://openalex.org/W3144253442","https://openalex.org/W3153832461","https://openalex.org/W3162926177","https://openalex.org/W3168053944","https://openalex.org/W3180196270","https://openalex.org/W3181695292","https://openalex.org/W4224035735","https://openalex.org/W4288079574","https://openalex.org/W4288099666","https://openalex.org/W4312635677","https://openalex.org/W6755207826","https://openalex.org/W6779823529","https://openalex.org/W6783182287","https://openalex.org/W6783713337","https://openalex.org/W6788990321","https://openalex.org/W6795288823","https://openalex.org/W6809885388","https://openalex.org/W6840815571"],"related_works":["https://openalex.org/W2687972263","https://openalex.org/W2029249305","https://openalex.org/W2511137960","https://openalex.org/W3214088465","https://openalex.org/W2115571026","https://openalex.org/W2604231787","https://openalex.org/W2610014769","https://openalex.org/W4321142835","https://openalex.org/W3084370450","https://openalex.org/W2483420468"],"abstract_inverted_index":{"We":[0,133],"propose":[1],"a":[2,68,72,83],"simple":[3],"and":[4,22,77,124,130],"novel":[5],"method":[6],"for":[7,148],"generating":[8],"3D":[9,65],"human":[10],"motion":[11,50,66],"from":[12,30],"complex":[13],"natural":[14,129],"language":[15],"sentences,":[16],"which":[17],"describe":[18],"different":[19],"velocity,":[20],"direction":[21],"composition":[23],"of":[24,27,55,74,91,143,146],"all":[25],"kinds":[26],"actions.":[28],"Different":[29],"existing":[31],"methods":[32],"that":[33,113,138],"use":[34,96],"classical":[35],"generative":[36],"architecture,":[37],"we":[38,95],"apply":[39],"the":[40,53,89,97,106],"Denoising":[41],"Diffusion":[42],"Probabilistic":[43],"Model":[44],"to":[45,101],"this":[46],"task,":[47],"synthesizing":[48],"diverse":[49,131],"results":[51,118],"under":[52],"guidance":[54,99],"texts.":[56],"The":[57],"diffusion":[58],"model":[59,107,115,140],"converts":[60],"white":[61],"noise":[62],"into":[63,105],"structured":[64],"by":[67,81],"Markov":[69],"process":[70],"with":[71,136],"series":[73],"denoising":[75],"steps":[76],"is":[78,141],"efficiently":[79],"trained":[80],"optimizing":[82],"variational":[84],"lower":[85],"bound.":[86],"To":[87],"achieve":[88],"goal":[90],"text-conditioned":[92],"image":[93],"synthesis,":[94],"classifier-free":[98],"strategy":[100],"add":[102],"text":[103,150],"embedding":[104],"during":[108],"training.":[109],"Our":[110],"experiments":[111,137],"demonstrate":[112],"our":[114,139],"achieves":[116],"competitive":[117],"on":[119],"HumanML3D":[120],"test":[121],"set":[122],"quantitatively":[123],"can":[125],"generate":[126],"more":[127],"visually":[128],"examples.":[132],"also":[134],"show":[135],"capable":[142],"zero-shot":[144],"generation":[145],"motions":[147],"unseen":[149],"guidance.":[151]},"counts_by_year":[{"year":2025,"cited_by_count":9},{"year":2024,"cited_by_count":18},{"year":2023,"cited_by_count":3}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
