{"id":"https://openalex.org/W7162490638","doi":"https://doi.org/10.1109/3dv69130.2026.00077","title":"InterPose: Learning to Generate Human-Object Interactions from Large-Scale Web Videos","display_name":"InterPose: Learning to Generate Human-Object Interactions from Large-Scale Web Videos","publication_year":2026,"publication_date":"2026-03-20","ids":{"openalex":"https://openalex.org/W7162490638","doi":"https://doi.org/10.1109/3dv69130.2026.00077"},"language":null,"primary_location":{"id":"doi:10.1109/3dv69130.2026.00077","is_oa":false,"landing_page_url":"https://doi.org/10.1109/3dv69130.2026.00077","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2026 International Conference on 3D Vision (3DV)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5036187685","display_name":"Yangsong Zhang","orcid":"https://orcid.org/0000-0002-6764-3567"},"institutions":[{"id":"https://openalex.org/I4210113480","display_name":"Mohamed bin Zayed University of Artificial Intelligence","ror":"https://ror.org/0258gkt32","country_code":"AE","type":"education","lineage":["https://openalex.org/I4210113480"]}],"countries":["AE"],"is_corresponding":false,"raw_author_name":"Yangsong Zhang","raw_affiliation_strings":["Mohamed Bin Zayed University of Artificial Intelligence (MBZUAI)"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Mohamed Bin Zayed University of Artificial Intelligence (MBZUAI)","institution_ids":["https://openalex.org/I4210113480"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129508489","display_name":"Abdul Ahad Butt","orcid":null},"institutions":[{"id":"https://openalex.org/I4210113480","display_name":"Mohamed bin Zayed University of Artificial Intelligence","ror":"https://ror.org/0258gkt32","country_code":"AE","type":"education","lineage":["https://openalex.org/I4210113480"]}],"countries":["AE"],"is_corresponding":false,"raw_author_name":"Abdul Ahad Butt","raw_affiliation_strings":["Mohamed Bin Zayed University of Artificial Intelligence (MBZUAI)"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Mohamed Bin Zayed University of Artificial Intelligence (MBZUAI)","institution_ids":["https://openalex.org/I4210113480"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5024292507","display_name":"G\u00fcl Varol","orcid":"https://orcid.org/0000-0002-8438-6152"},"institutions":[{"id":"https://openalex.org/I1294671590","display_name":"Centre National de la Recherche Scientifique","ror":"https://ror.org/02feahw73","country_code":"FR","type":"government","lineage":["https://openalex.org/I1294671590"]},{"id":"https://openalex.org/I4210154111","display_name":"Universit\u00e9 Gustave Eiffel","ror":"https://ror.org/03x42jk29","country_code":"FR","type":"education","lineage":["https://openalex.org/I4210154111"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"G\u00fcl Varol","raw_affiliation_strings":["LIGM, &#x00C9;cole des Ponts, IP Paris, Univ Gustave Eiffel, CNRS"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"LIGM, &#x00C9;cole des Ponts, IP Paris, Univ Gustave Eiffel, CNRS","institution_ids":["https://openalex.org/I1294671590","https://openalex.org/I4210154111"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5137167667","display_name":"Ivan Laptev","orcid":null},"institutions":[{"id":"https://openalex.org/I4210113480","display_name":"Mohamed bin Zayed University of Artificial Intelligence","ror":"https://ror.org/0258gkt32","country_code":"AE","type":"education","lineage":["https://openalex.org/I4210113480"]}],"countries":["AE"],"is_corresponding":false,"raw_author_name":"Ivan Laptev","raw_affiliation_strings":["Mohamed Bin Zayed University of Artificial Intelligence (MBZUAI)"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Mohamed Bin Zayed University of Artificial Intelligence (MBZUAI)","institution_ids":["https://openalex.org/I4210113480"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.85180533,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"750","last_page":"759"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.890500009059906,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.890500009059906,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.03929999843239784,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.025299999862909317,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.30649998784065247},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.2953000068664551},{"id":"https://openalex.org/keywords/the-internet","display_name":"The Internet","score":0.295199990272522},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.2784999907016754}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6392999887466431},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.3711000084877014},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.35280001163482666},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.30649998784065247},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.2953000068664551},{"id":"https://openalex.org/C110875604","wikidata":"https://www.wikidata.org/wiki/Q75","display_name":"The Internet","level":2,"score":0.295199990272522},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.2879999876022339},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.2784999907016754},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.267300009727478},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.26109999418258667}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/3dv69130.2026.00077","is_oa":false,"landing_page_url":"https://doi.org/10.1109/3dv69130.2026.00077","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2026 International Conference on 3D Vision (3DV)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":48,"referenced_works":["https://openalex.org/W1967554269","https://openalex.org/W2337252826","https://openalex.org/W2971856312","https://openalex.org/W2978956737","https://openalex.org/W3081268564","https://openalex.org/W3108262631","https://openalex.org/W3153832461","https://openalex.org/W3167478287","https://openalex.org/W4206861281","https://openalex.org/W4221045999","https://openalex.org/W4226289673","https://openalex.org/W4230429791","https://openalex.org/W4288079574","https://openalex.org/W4297981470","https://openalex.org/W4297989763","https://openalex.org/W4312242013","https://openalex.org/W4312457231","https://openalex.org/W4312635677","https://openalex.org/W4312783246","https://openalex.org/W4313072274","https://openalex.org/W4378084859","https://openalex.org/W4386065807","https://openalex.org/W4386076103","https://openalex.org/W4386076288","https://openalex.org/W4387963301","https://openalex.org/W4388284323","https://openalex.org/W4389339329","https://openalex.org/W4390871696","https://openalex.org/W4390872031","https://openalex.org/W4390874263","https://openalex.org/W4399563483","https://openalex.org/W4402704593","https://openalex.org/W4402715826","https://openalex.org/W4402754111","https://openalex.org/W4402772414","https://openalex.org/W4402772461","https://openalex.org/W4402916263","https://openalex.org/W4402951619","https://openalex.org/W4403842319","https://openalex.org/W4404526366","https://openalex.org/W4404966227","https://openalex.org/W4413144746","https://openalex.org/W4413145303","https://openalex.org/W4413146472","https://openalex.org/W4413146706","https://openalex.org/W4413146934","https://openalex.org/W4415799055","https://openalex.org/W7160218459"],"related_works":[],"abstract_inverted_index":{"Human":[0],"motion":[1,14,69,93,145],"generation":[2],"has":[3],"shown":[4],"great":[5],"advances":[6],"powered":[7],"by":[8],"recent":[9],"diffusion":[10],"models":[11],"and":[12,47,78,96,116,133,164],"large-scale":[13,61],"capture":[15,70],"data.":[16],"Most":[17],"of":[18,25,60,80,83,112,158],"existing":[19,68],"works,":[20],"however,":[21],"currently":[22],"target":[23],"animation":[24,157],"isolated":[26],"people":[27,77,159],"in":[28,36,44],"empty":[29],"scenes.":[30,165],"Meanwhile,":[31],"synthesizing":[32],"realistic":[33],"human-object":[34,127],"interactions":[35,56],"complex":[37],"3D":[38,113],"scenes":[39],"remains":[40],"a":[41],"critical":[42],"challenge":[43],"computer":[45],"graphics":[46],"robotics.":[48],"One":[49],"obstacle":[50],"towards":[51],"generating":[52],"versatile":[53],"high-fidelity":[54],"humanobject":[55],"is":[57,72],"the":[58],"lack":[59],"datasets":[62],"with":[63,126,161],"diverse":[64,162],"object":[65],"manipulations.":[66],"Indeed,":[67],"data":[71],"typically":[73],"restricted":[74],"to":[75,99,136,140],"single":[76],"manipulations":[79],"limited":[81],"sets":[82],"objects.":[84],"To":[85],"address":[86],"this":[87],"issue,":[88],"we":[89,150],"propose":[90],"an":[91,152],"automatic":[92],"extraction":[94],"pipeline":[95],"use":[97],"it":[98],"collect":[100],"interaction-rich":[101],"human":[102,114,144],"motions.":[103],"Our":[104],"new":[105],"dataset":[106],"InterPose":[107,135,149],"contains":[108],"73.8":[109],"K":[110,124],"sequences":[111],"motions":[115],"corresponding":[117],"text":[118],"captions":[119],"automatically":[120],"obtained":[121],"from":[122],"45.8":[123],"videos":[125],"interactions.":[128],"We":[129],"perform":[130],"extensive":[131],"experiments":[132],"demonstrate":[134],"bring":[137],"significant":[138],"improvements":[139],"state-of-the-art":[141],"methods":[142],"for":[143],"generation.":[146],"Moreover,":[147],"using":[148],"develop":[151],"LLM-based":[153],"agent":[154],"enabling":[155],"zero-shot":[156],"interacting":[160],"objects":[163]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-28T00:00:00"}
