{"id":"https://openalex.org/W4413018528","doi":"https://doi.org/10.1109/fg61629.2025.11099295","title":"OASIS: Object-guided Attention for Text-conditional Diffusion Synthesis of Human Interaction Sequences","display_name":"OASIS: Object-guided Attention for Text-conditional Diffusion Synthesis of Human Interaction Sequences","publication_year":2025,"publication_date":"2025-05-26","ids":{"openalex":"https://openalex.org/W4413018528","doi":"https://doi.org/10.1109/fg61629.2025.11099295"},"language":"en","primary_location":{"id":"doi:10.1109/fg61629.2025.11099295","is_oa":false,"landing_page_url":"https://doi.org/10.1109/fg61629.2025.11099295","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE 19th International Conference on Automatic Face and Gesture Recognition (FG)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100838868","display_name":"Chih-Chun Yang","orcid":null},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Chih-Chun Yang","raw_affiliation_strings":["Carnegie Mellon University,USA"],"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University,USA","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5086254443","display_name":"Tianhui Cai","orcid":null},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Tianhui Cai","raw_affiliation_strings":["Carnegie Mellon University,USA"],"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University,USA","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5086118610","display_name":"Zolt\u00e1n \u00c1. Milacski","orcid":"https://orcid.org/0000-0002-3135-2936"},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Zolt\u00e1n Milacski","raw_affiliation_strings":["Carnegie Mellon University,USA"],"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University,USA","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5112285262","display_name":"Aayush Prakash","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Aayush Prakash","raw_affiliation_strings":["Meta,USA"],"affiliations":[{"raw_affiliation_string":"Meta,USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013460294","display_name":"Shingo Takagi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shingo Takagi","raw_affiliation_strings":["Meta,USA"],"affiliations":[{"raw_affiliation_string":"Meta,USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103065489","display_name":"Daeil Kim","orcid":"https://orcid.org/0000-0002-2708-6049"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Daeil Kim","raw_affiliation_strings":["Meta,USA"],"affiliations":[{"raw_affiliation_string":"Meta,USA","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5000157334","display_name":"Fernando De La Torre","orcid":"https://orcid.org/0000-0002-7086-8572"},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Fernando De La Torre","raw_affiliation_strings":["Carnegie Mellon University,USA"],"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University,USA","institution_ids":["https://openalex.org/I74973139"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5100838868"],"corresponding_institution_ids":["https://openalex.org/I74973139"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.11313017,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"9"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9878000020980835,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9878000020980835,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.9868000149726868,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9678999781608582,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.62978196144104},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.5963977575302124},{"id":"https://openalex.org/keywords/diffusion","display_name":"Diffusion","score":0.5627423524856567},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4118833839893341},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3858070969581604},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.34250324964523315},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.32601678371429443},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.11960950493812561}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.62978196144104},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.5963977575302124},{"id":"https://openalex.org/C69357855","wikidata":"https://www.wikidata.org/wiki/Q163214","display_name":"Diffusion","level":2,"score":0.5627423524856567},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4118833839893341},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3858070969581604},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.34250324964523315},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.32601678371429443},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.11960950493812561},{"id":"https://openalex.org/C97355855","wikidata":"https://www.wikidata.org/wiki/Q11473","display_name":"Thermodynamics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/fg61629.2025.11099295","is_oa":false,"landing_page_url":"https://doi.org/10.1109/fg61629.2025.11099295","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE 19th International Conference on Automatic Face and Gesture Recognition (FG)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":40,"referenced_works":["https://openalex.org/W2752796333","https://openalex.org/W2968722025","https://openalex.org/W2971856312","https://openalex.org/W2982625143","https://openalex.org/W3003177118","https://openalex.org/W3034479523","https://openalex.org/W3034681090","https://openalex.org/W3081268564","https://openalex.org/W3153832461","https://openalex.org/W3176252609","https://openalex.org/W3180172155","https://openalex.org/W3185319321","https://openalex.org/W3202782837","https://openalex.org/W4286743373","https://openalex.org/W4288079574","https://openalex.org/W4297981470","https://openalex.org/W4297989763","https://openalex.org/W4312242013","https://openalex.org/W4312457231","https://openalex.org/W4312544766","https://openalex.org/W4312635677","https://openalex.org/W4313145975","https://openalex.org/W4378084859","https://openalex.org/W4385764497","https://openalex.org/W4386071605","https://openalex.org/W4386076288","https://openalex.org/W4390872595","https://openalex.org/W4390873054","https://openalex.org/W4391305822","https://openalex.org/W4394625728","https://openalex.org/W4402703045","https://openalex.org/W4402772414","https://openalex.org/W4404722747","https://openalex.org/W6687045409","https://openalex.org/W6763422710","https://openalex.org/W6765779288","https://openalex.org/W6779823529","https://openalex.org/W6791353385","https://openalex.org/W6809885388","https://openalex.org/W6844223692"],"related_works":["https://openalex.org/W2737719445","https://openalex.org/W2898210368","https://openalex.org/W4239098401","https://openalex.org/W2382480268","https://openalex.org/W1976518449","https://openalex.org/W2732837990","https://openalex.org/W2363366881","https://openalex.org/W4206198161","https://openalex.org/W2363276194","https://openalex.org/W4285122238"],"abstract_inverted_index":{"Analyzing":[0],"and":[1,22,35,108,129,150,169,178,182],"synthesizing":[2],"human-object":[3],"interaction":[4,73,152],"is":[5],"crucial":[6],"for":[7,120,174,187],"advancing":[8],"intelligent":[9],"systems":[10],"that":[11,56],"engage":[12],"with":[13,163],"the":[14,43,66,78,81,97,103,109,127,145,157],"physical":[15],"environment.":[16],"However,":[17],"simultaneous":[18],"tracking":[19],"of":[20,46,69,80,131,165],"human":[21,60,98],"object":[23,105],"data":[24],"presents":[25],"inherent":[26],"challenges,":[27],"resulting":[28],"in":[29,31,134,148,167,172,180,185],"limitations":[30],"dataset":[32],"scale,":[33],"diversity,":[34],"annotation":[36],"quality":[37],"within":[38],"this":[39],"domain,":[40],"thereby":[41],"hindering":[42],"generalization":[44],"ability":[45],"trained":[47],"models.":[48],"This":[49],"study":[50],"introduces":[51],"OASIS,":[52],"a":[53,117],"novel":[54],"framework":[55,133],"extends":[57],"pretrained":[58,82],"text-conditional":[59],"motion":[61,83,99,106,176,189],"diffusion":[62,84],"models":[63],"to":[64,95,101,156],"address":[65],"complex":[67],"task":[68],"fullbody":[70],"3D":[71],"hand-object":[72],"generation.":[74],"Specifically,":[75],"we":[76,93,125],"freeze":[77],"parameters":[79],"model,":[85],"while":[86],"incorporating":[87],"additional":[88],"object-guided":[89],"attention":[90],"layers,":[91],"which":[92],"train":[94],"adapt":[96],"latents":[100],"match":[102],"input":[104],"sequence":[107],"text.":[110],"Our":[111,142],"method":[112,143,160],"can":[113],"be":[114],"understood":[115],"as":[116],"ControlNet":[118],"[38]":[119],"interaction.":[121],"Through":[122],"extensive":[123],"experimentation,":[124],"demonstrate":[126],"effectiveness":[128],"robustness":[130],"our":[132],"generating":[135],"realistic":[136],"handobject":[137],"interactions":[138],"from":[139],"textual":[140],"descriptions.":[141],"surpasses":[144],"state-of-the-art":[146],"performance":[147],"FID":[149,168,181],"accuracy":[151,173,186],"fidelity":[153],"metrics":[154],"compared":[155],"prior":[158],"best":[159],"IMoS":[161],"[10],":[162],"improvements":[164],"0.08":[166],"$2":[170],"\\%$":[171,184],"body":[175],"synthesis,":[177],"0.15":[179],"$10":[183],"hand":[188],"synthesis.":[190]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
