{"id":"https://openalex.org/W4404307861","doi":"https://doi.org/10.48550/arxiv.2410.18977","title":"Pay Attention and Move Better: Harnessing Attention for Interactive Motion Generation and Training-free Editing","display_name":"Pay Attention and Move Better: Harnessing Attention for Interactive Motion Generation and Training-free Editing","publication_year":2024,"publication_date":"2024-10-24","ids":{"openalex":"https://openalex.org/W4404307861","doi":"https://doi.org/10.48550/arxiv.2410.18977"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2410.18977","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2410.18977","pdf_url":"https://arxiv.org/pdf/2410.18977","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2410.18977","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5103263210","display_name":"Ling-Hao Chen","orcid":"https://orcid.org/0000-0002-2528-6178"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Chen, Ling-Hao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5066974590","display_name":"Wenxun Dai","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lu, Shunlin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5029738173","display_name":"Xuan Ju","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dai, Wenxun","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5028455158","display_name":"Shunlin Lu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dou, Zhiyang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100433899","display_name":"Lei Zhang","orcid":"https://orcid.org/0000-0002-2078-4215"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ju, Xuan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Wang, Jingbo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Jingbo","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Komura, Taku","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Komura, Taku","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":null,"display_name":"Zhang, Lei","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Lei","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5103263210"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9887999892234802,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9887999892234802,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9855999946594238,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10531","display_name":"Advanced Vision and Imaging","score":0.9837999939918518,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/training","display_name":"Training (meteorology)","score":0.7136156558990479},{"id":"https://openalex.org/keywords/motion","display_name":"Motion (physics)","score":0.6337788701057434},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.579738974571228},{"id":"https://openalex.org/keywords/cognitive-psychology","display_name":"Cognitive psychology","score":0.39866316318511963},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.37349772453308105},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.2938218414783478},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.2828209698200226},{"id":"https://openalex.org/keywords/geography","display_name":"Geography","score":0.06677433848381042}],"concepts":[{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.7136156558990479},{"id":"https://openalex.org/C104114177","wikidata":"https://www.wikidata.org/wiki/Q79782","display_name":"Motion (physics)","level":2,"score":0.6337788701057434},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.579738974571228},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.39866316318511963},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.37349772453308105},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.2938218414783478},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2828209698200226},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.06677433848381042},{"id":"https://openalex.org/C153294291","wikidata":"https://www.wikidata.org/wiki/Q25261","display_name":"Meteorology","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2410.18977","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2410.18977","pdf_url":"https://arxiv.org/pdf/2410.18977","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2410.18977","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2410.18977","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2410.18977","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2410.18977","pdf_url":"https://arxiv.org/pdf/2410.18977","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W230091440","https://openalex.org/W2390279801","https://openalex.org/W2233261550","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2810751659"],"abstract_inverted_index":{"This":[0],"research":[1],"delves":[2],"into":[3],"the":[4,21,56,68,74,81,88,94,100,104,144,147,153],"problem":[5],"of":[6,9,20,50,83,117,143,146,155],"interactive":[7],"editing":[8,32,122,176],"human":[10],"motion":[11,14,42,84,105,121,130,133,137,159],"generation.":[12],"Previous":[13],"diffusion":[15,43],"models":[16,55],"lack":[17],"explicit":[18],"modeling":[19,49],"word-level":[22],"text-motion":[23],"correspondence":[24,97],"and":[25,58,63,79,98,135,157,175],"good":[26,173,179],"explainability,":[27],"hence":[28],"restricting":[29],"their":[30],"fine-grained":[31,95],"ability.":[33],"To":[34],"address":[35],"this":[36],"issue,":[37],"we":[38,112,150],"propose":[39],"an":[40],"attention-based":[41],"model,":[44],"namely":[45],"MotionCLR,":[46],"with":[47,61,178],"CLeaR":[48],"attention":[51,126,148,163],"mechanisms.":[52],"Technically,":[53],"MotionCLR":[54],"in-modality":[57],"cross-modality":[59],"interactions":[60],"self-attention":[62,69],"cross-attention,":[64],"respectively.":[65],"More":[66],"specifically,":[67],"mechanism":[70,90],"aims":[71],"to":[72,92],"measure":[73],"sequential":[75],"similarity":[76],"between":[77],"frames":[78],"impacts":[80],"order":[82],"features.":[85],"By":[86],"contrast,":[87],"cross-attention":[89],"works":[91],"find":[93],"word-sequence":[96],"activate":[99],"corresponding":[101],"timesteps":[102],"in":[103],"sequence.":[106],"Based":[107],"on":[108],"these":[109],"key":[110],"properties,":[111],"develop":[113],"a":[114],"versatile":[115],"set":[116],"simple":[118],"yet":[119],"effective":[120],"methods":[123],"via":[124,162],"manipulating":[125],"maps,":[127],"such":[128],"as":[129],"(de-)emphasizing,":[131],"in-place":[132],"replacement,":[134],"example-based":[136],"generation,":[138],"etc.":[139],"For":[140],"further":[141],"verification":[142],"explainability":[145],"mechanism,":[149],"additionally":[151],"explore":[152],"potential":[154],"action-counting":[156],"grounded":[158],"generation":[160,174],"ability":[161,177],"maps.":[164],"Our":[165],"experimental":[166],"results":[167],"show":[168],"that":[169],"our":[170],"method":[171],"enjoys":[172],"explainability.":[180]},"counts_by_year":[],"updated_date":"2026-03-10T16:38:18.471706","created_date":"2024-11-13T00:00:00"}
