{"id":"https://openalex.org/W7138041878","doi":"https://doi.org/10.1609/aaai.v40i14.38156","title":"MMG-VL: A Vision-Language Driven Approach for Multi-Person Motion Generation","display_name":"MMG-VL: A Vision-Language Driven Approach for Multi-Person Motion Generation","publication_year":2026,"publication_date":"2026-03-14","ids":{"openalex":"https://openalex.org/W7138041878","doi":"https://doi.org/10.1609/aaai.v40i14.38156"},"language":null,"primary_location":{"id":"doi:10.1609/aaai.v40i14.38156","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i14.38156","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/38156/42118","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://ojs.aaai.org/index.php/AAAI/article/download/38156/42118","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5129653310","display_name":"Songyuan Yang","orcid":null},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Songyuan Yang","raw_affiliation_strings":["National University of Defense Technology"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"National University of Defense Technology","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129665508","display_name":"Wanrong Huang","orcid":null},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wanrong Huang","raw_affiliation_strings":["National University of Defense Technology"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"National University of Defense Technology","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129710690","display_name":"Yinuo Liu","orcid":null},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yinuo Liu","raw_affiliation_strings":["National University of Defense Technology"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"National University of Defense Technology","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034265164","display_name":"Zhang Ke-di","orcid":null},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhang Ke-Di","raw_affiliation_strings":["National University of Defense Technology"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"National University of Defense Technology","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128707716","display_name":"Xihuai He","orcid":null},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xihuai He","raw_affiliation_strings":["National University of Defense Technology"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"National University of Defense Technology","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108316314","display_name":"Shaowu Yang","orcid":null},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shaowu Yang","raw_affiliation_strings":["National University of Defense Technology"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"National University of Defense Technology","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5004941473","display_name":"Huibin Tan","orcid":"https://orcid.org/0000-0003-4060-8793"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Huibin Tan","raw_affiliation_strings":["National University of Defense Technology"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"National University of Defense Technology","institution_ids":["https://openalex.org/I170215575"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5129653310"],"corresponding_institution_ids":["https://openalex.org/I170215575"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.27642276,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"40","issue":"14","first_page":"11712","last_page":"11720"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12290","display_name":"Human Motion and Animation","score":0.9108999967575073,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12290","display_name":"Human Motion and Animation","score":0.9108999967575073,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.03449999913573265,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10719","display_name":"3D Shape Modeling and Analysis","score":0.021800000220537186,"subfield":{"id":"https://openalex.org/subfields/2206","display_name":"Computational Mechanics"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/motion","display_name":"Motion (physics)","score":0.6467000246047974},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.6134999990463257},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5936999917030334},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.5741999745368958},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.5507000088691711},{"id":"https://openalex.org/keywords/motion-capture","display_name":"Motion capture","score":0.44600000977516174},{"id":"https://openalex.org/keywords/motion-planning","display_name":"Motion planning","score":0.42750000953674316},{"id":"https://openalex.org/keywords/planner","display_name":"Planner","score":0.4016999900341034},{"id":"https://openalex.org/keywords/visualization","display_name":"Visualization","score":0.38100001215934753}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7480999827384949},{"id":"https://openalex.org/C104114177","wikidata":"https://www.wikidata.org/wiki/Q79782","display_name":"Motion (physics)","level":2,"score":0.6467000246047974},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.6134999990463257},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5936999917030334},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.5741999745368958},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.5507000088691711},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5293999910354614},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.49160000681877136},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.4675000011920929},{"id":"https://openalex.org/C48007421","wikidata":"https://www.wikidata.org/wiki/Q676252","display_name":"Motion capture","level":3,"score":0.44600000977516174},{"id":"https://openalex.org/C81074085","wikidata":"https://www.wikidata.org/wiki/Q366872","display_name":"Motion planning","level":3,"score":0.42750000953674316},{"id":"https://openalex.org/C2776999362","wikidata":"https://www.wikidata.org/wiki/Q2349274","display_name":"Planner","level":2,"score":0.4016999900341034},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.38100001215934753},{"id":"https://openalex.org/C145912823","wikidata":"https://www.wikidata.org/wiki/Q113558","display_name":"Dynamics (music)","level":2,"score":0.3693999946117401},{"id":"https://openalex.org/C189645446","wikidata":"https://www.wikidata.org/wiki/Q350865","display_name":"Mirroring","level":2,"score":0.3560999929904938},{"id":"https://openalex.org/C194969405","wikidata":"https://www.wikidata.org/wiki/Q170519","display_name":"Virtual reality","level":2,"score":0.3531999886035919},{"id":"https://openalex.org/C205711294","wikidata":"https://www.wikidata.org/wiki/Q176953","display_name":"Rendering (computer graphics)","level":2,"score":0.32919999957084656},{"id":"https://openalex.org/C2778770139","wikidata":"https://www.wikidata.org/wiki/Q1966904","display_name":"Solver","level":2,"score":0.32199999690055847},{"id":"https://openalex.org/C90697248","wikidata":"https://www.wikidata.org/wiki/Q1062896","display_name":"Character animation","level":4,"score":0.3156000077724457},{"id":"https://openalex.org/C25344961","wikidata":"https://www.wikidata.org/wiki/Q192726","display_name":"Virtual machine","level":2,"score":0.30630001425743103},{"id":"https://openalex.org/C166109690","wikidata":"https://www.wikidata.org/wiki/Q4677422","display_name":"Action selection","level":3,"score":0.28139999508857727},{"id":"https://openalex.org/C193519340","wikidata":"https://www.wikidata.org/wiki/Q891179","display_name":"Data loss","level":2,"score":0.27070000767707825},{"id":"https://openalex.org/C502989409","wikidata":"https://www.wikidata.org/wiki/Q11425","display_name":"Animation","level":2,"score":0.2685999870300293},{"id":"https://openalex.org/C44210515","wikidata":"https://www.wikidata.org/wiki/Q16968978","display_name":"Bespoke","level":2,"score":0.26809999346733093},{"id":"https://openalex.org/C2776608160","wikidata":"https://www.wikidata.org/wiki/Q4785462","display_name":"Natural (archaeology)","level":2,"score":0.2655999958515167},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.2621999979019165},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.257999986410141},{"id":"https://openalex.org/C34413123","wikidata":"https://www.wikidata.org/wiki/Q170978","display_name":"Robotics","level":3,"score":0.25679999589920044},{"id":"https://openalex.org/C2780615836","wikidata":"https://www.wikidata.org/wiki/Q2471869","display_name":"USable","level":2,"score":0.2565999925136566},{"id":"https://openalex.org/C155911762","wikidata":"https://www.wikidata.org/wiki/Q422321","display_name":"Blueprint","level":2,"score":0.25459998846054077}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1609/aaai.v40i14.38156","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i14.38156","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/38156/42118","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1609/aaai.v40i14.38156","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i14.38156","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/38156/42118","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320324150","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11"},{"id":"https://openalex.org/F4320335581","display_name":"Young Scientists Fund","ror":null}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W7138041878.pdf","grobid_xml":"https://content.openalex.org/works/W7138041878.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Generating":[0],"realistic":[1],"and":[2,40,54,75,91,144,154,184,202,212],"coordinated":[3,63,203],"3D":[4,38,115,182],"human":[5],"motion":[6,116],"for":[7,209],"multiple":[8],"individuals":[9],"within":[10],"complex":[11],"environments":[12],"remains":[13],"a":[14,58,71,81,97,105,130,145],"significant":[15],"challenge.":[16],"Existing":[17],"text-to-motion":[18],"methods":[19,195],"are":[20],"often":[21],"``blind''":[22],"to":[23,28,86,134],"the":[24,51,67,88,93,136,167,207],"physical":[25,152],"scene,":[26],"leading":[27],"implausible":[29],"motions,":[30,183,205],"while":[31],"scene-conditioned":[32],"(HSI)":[33],"approaches":[34],"demand":[35],"cumbersome":[36],"full":[37],"data":[39],"largely":[41],"neglect":[42],"multi-person":[43,64,101,172,204],"dynamics.":[44],"To":[45,160],"address":[46],"these":[47,111,121],"limitations,":[48],"we":[49,164],"introduce":[50,165],"VL2Motion":[52],"paradigm":[53],"its":[55],"embodiment,":[56],"MMG-VL,":[57],"hierarchical":[59],"framework":[60],"that":[61,150,190],"generates":[62],"motions":[65],"from":[66],"most":[68],"accessible":[69],"inputs:":[70],"single":[72],"2D":[73],"image":[74],"natural":[76],"language.":[77],"MMG-VL":[78,191],"first":[79,168],"employs":[80],"Scene-Aware":[82],"Intent":[83],"Planner":[84],"(SAIP)":[85],"interpret":[87],"visual":[89,142],"context":[90],"decompose":[92],"user's":[94],"command":[95],"into":[96,113],"set":[98],"of":[99,215],"spatially-grounded,":[100],"action":[102],"blueprints.":[103],"Subsequently,":[104],"Coordinated":[106,146],"Motion":[107],"Synthesizer":[108],"(CMS)":[109],"translates":[110],"blueprints":[112],"high-fidelity":[114],"sequences.":[117],"The":[118],"synergy":[119],"between":[120],"stages":[122],"is":[123,139],"driven":[124],"by":[125],"two":[126],"novel":[127],"loss":[128],"functions:":[129],"Spatial-Semantic":[131],"Grounding":[132],"Loss":[133,149],"ensure":[135],"planner's":[137],"output":[138],"grounded":[140],"in":[141,174,196],"reality,":[143],"Environmental":[147],"Realism":[148],"enforces":[151],"constraints":[153],"coherent":[155],"group":[156],"dynamics":[157],"during":[158],"synthesis.":[159],"facilitate":[161],"this":[162],"research,":[163],"HumanVL,":[166],"large-scale":[169],"dataset":[170],"featuring":[171],"activities":[173],"multi-room":[175],"scenes,":[176],"providing":[177],"aligned":[178],"images,":[179],"text,":[180],"blueprints,":[181],"scene":[185],"geometry.":[186],"Extensive":[187],"experiments":[188],"demonstrate":[189],"significantly":[192],"outperforms":[193],"existing":[194],"generating":[197],"spatially":[198],"coherent,":[199],"physically":[200],"realistic,":[201],"paving":[206],"way":[208],"more":[210],"scalable":[211],"intuitive":[213],"creation":[214],"dynamic":[216],"virtual":[217],"worlds.":[218]},"counts_by_year":[],"updated_date":"2026-05-21T06:26:12.895304","created_date":"2026-03-18T00:00:00"}
