{"id":"https://openalex.org/W4402618831","doi":"https://doi.org/10.1145/3664647.3680936","title":"ReCorD: Reasoning and Correcting Diffusion for HOI Generation","display_name":"ReCorD: Reasoning and Correcting Diffusion for HOI Generation","publication_year":2024,"publication_date":"2024-10-26","ids":{"openalex":"https://openalex.org/W4402618831","doi":"https://doi.org/10.1145/3664647.3680936"},"language":"en","primary_location":{"id":"doi:10.1145/3664647.3680936","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3664647.3680936","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 32nd ACM International Conference on Multimedia","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2407.17911","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5016296062","display_name":"Jian-Yu Jiang-Lin","orcid":null},"institutions":[{"id":"https://openalex.org/I16733864","display_name":"National Taiwan University","ror":"https://ror.org/05bqach95","country_code":"TW","type":"education","lineage":["https://openalex.org/I16733864"]}],"countries":["TW"],"is_corresponding":true,"raw_author_name":"Jian-Yu Jiang-Lin","raw_affiliation_strings":["Natl. Taiwan Univ., Taipei, Taiwan"],"affiliations":[{"raw_affiliation_string":"Natl. Taiwan Univ., Taipei, Taiwan","institution_ids":["https://openalex.org/I16733864"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5032931636","display_name":"Kang-Yang Huang","orcid":"https://orcid.org/0000-0003-1268-5214"},"institutions":[{"id":"https://openalex.org/I16733864","display_name":"National Taiwan University","ror":"https://ror.org/05bqach95","country_code":"TW","type":"education","lineage":["https://openalex.org/I16733864"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Kang-Yang Huang","raw_affiliation_strings":["Natl. Taiwan Univ., Taipei, Taiwan"],"affiliations":[{"raw_affiliation_string":"Natl. Taiwan Univ., Taipei, Taiwan","institution_ids":["https://openalex.org/I16733864"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5042705745","display_name":"Ling Lo","orcid":"https://orcid.org/0000-0002-9471-8528"},"institutions":[{"id":"https://openalex.org/I148366613","display_name":"National Yang Ming Chiao Tung University","ror":"https://ror.org/00se2k293","country_code":"TW","type":"education","lineage":["https://openalex.org/I148366613"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Ling Lo","raw_affiliation_strings":["Natl. Yang Ming Chiao Tung Univ., Hsinchu, Taiwan"],"affiliations":[{"raw_affiliation_string":"Natl. Yang Ming Chiao Tung Univ., Hsinchu, Taiwan","institution_ids":["https://openalex.org/I148366613"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102083131","display_name":"Yining Huang","orcid":"https://orcid.org/0000-0002-9634-9156"},"institutions":[{"id":"https://openalex.org/I148366613","display_name":"National Yang Ming Chiao Tung University","ror":"https://ror.org/00se2k293","country_code":"TW","type":"education","lineage":["https://openalex.org/I148366613"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Yi-Ning Huang","raw_affiliation_strings":["Natl. Yang Ming Chiao Tung Univ., Hsinchu, Taiwan"],"affiliations":[{"raw_affiliation_string":"Natl. Yang Ming Chiao Tung Univ., Hsinchu, Taiwan","institution_ids":["https://openalex.org/I148366613"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114236418","display_name":"Terence Lin","orcid":null},"institutions":[{"id":"https://openalex.org/I148366613","display_name":"National Yang Ming Chiao Tung University","ror":"https://ror.org/00se2k293","country_code":"TW","type":"education","lineage":["https://openalex.org/I148366613"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Terence Lin","raw_affiliation_strings":["Natl. Yang Ming Chiao Tung Univ., Hsinchu, Taiwan"],"affiliations":[{"raw_affiliation_string":"Natl. Yang Ming Chiao Tung Univ., Hsinchu, Taiwan","institution_ids":["https://openalex.org/I148366613"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071442349","display_name":"Jhih-Ciang Wu","orcid":"https://orcid.org/0000-0003-4071-3980"},"institutions":[{"id":"https://openalex.org/I16733864","display_name":"National Taiwan University","ror":"https://ror.org/05bqach95","country_code":"TW","type":"education","lineage":["https://openalex.org/I16733864"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Jhih-Ciang Wu","raw_affiliation_strings":["Natl. Taiwan Univ., Taipei, Taiwan"],"affiliations":[{"raw_affiliation_string":"Natl. Taiwan Univ., Taipei, Taiwan","institution_ids":["https://openalex.org/I16733864"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040050806","display_name":"Hong-Han Shuai","orcid":"https://orcid.org/0000-0003-2216-077X"},"institutions":[{"id":"https://openalex.org/I148366613","display_name":"National Yang Ming Chiao Tung University","ror":"https://ror.org/00se2k293","country_code":"TW","type":"education","lineage":["https://openalex.org/I148366613"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Hong-Han Shuai","raw_affiliation_strings":["Natl. Yang Ming Chiao Tung Univ., Hsinchu, Taiwan"],"affiliations":[{"raw_affiliation_string":"Natl. Yang Ming Chiao Tung Univ., Hsinchu, Taiwan","institution_ids":["https://openalex.org/I148366613"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5000780442","display_name":"Wen-Huang Cheng","orcid":"https://orcid.org/0000-0002-4662-7875"},"institutions":[{"id":"https://openalex.org/I16733864","display_name":"National Taiwan University","ror":"https://ror.org/05bqach95","country_code":"TW","type":"education","lineage":["https://openalex.org/I16733864"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Wen-Huang Cheng","raw_affiliation_strings":["Natl. Taiwan Univ., Taipei, Taiwan"],"affiliations":[{"raw_affiliation_string":"Natl. Taiwan Univ., Taipei, Taiwan","institution_ids":["https://openalex.org/I16733864"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5016296062"],"corresponding_institution_ids":["https://openalex.org/I16733864"],"apc_list":null,"apc_paid":null,"fwci":0.9971,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.77288517,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"9465","last_page":"9474"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.995199978351593,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8343183994293213},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.658559262752533},{"id":"https://openalex.org/keywords/fidelity","display_name":"Fidelity","score":0.6367493867874146},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.5748339295387268},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5553304553031921},{"id":"https://openalex.org/keywords/generative-grammar","display_name":"Generative grammar","score":0.550719678401947},{"id":"https://openalex.org/keywords/selection","display_name":"Selection (genetic algorithm)","score":0.49862146377563477},{"id":"https://openalex.org/keywords/generative-model","display_name":"Generative model","score":0.4104541838169098},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3911629021167755},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.34635496139526367},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.303585022687912}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8343183994293213},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.658559262752533},{"id":"https://openalex.org/C2776459999","wikidata":"https://www.wikidata.org/wiki/Q2119376","display_name":"Fidelity","level":2,"score":0.6367493867874146},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.5748339295387268},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5553304553031921},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.550719678401947},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.49862146377563477},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.4104541838169098},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3911629021167755},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.34635496139526367},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.303585022687912},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3664647.3680936","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3664647.3680936","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 32nd ACM International Conference on Multimedia","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2407.17911","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2407.17911","pdf_url":"https://arxiv.org/pdf/2407.17911","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2407.17911","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2407.17911","pdf_url":"https://arxiv.org/pdf/2407.17911","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[{"display_name":"Quality Education","score":0.8299999833106995,"id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":23,"referenced_works":["https://openalex.org/W2133059825","https://openalex.org/W2964225075","https://openalex.org/W4304080673","https://openalex.org/W4304098884","https://openalex.org/W4310280916","https://openalex.org/W4312770707","https://openalex.org/W4312933868","https://openalex.org/W4378446358","https://openalex.org/W4385270985","https://openalex.org/W4385567149","https://openalex.org/W4385890045","https://openalex.org/W4386075631","https://openalex.org/W4386076027","https://openalex.org/W4386076215","https://openalex.org/W4387596111","https://openalex.org/W4387969545","https://openalex.org/W4388187930","https://openalex.org/W4390872387","https://openalex.org/W4390872671","https://openalex.org/W4390873054","https://openalex.org/W4390873732","https://openalex.org/W4402727884","https://openalex.org/W4402753507"],"related_works":["https://openalex.org/W4365211920","https://openalex.org/W3014948380","https://openalex.org/W4380551139","https://openalex.org/W4317695495","https://openalex.org/W4287117424","https://openalex.org/W4387506531","https://openalex.org/W2087346071","https://openalex.org/W2967848559","https://openalex.org/W4299831724","https://openalex.org/W4283803360"],"abstract_inverted_index":{"Diffusion":[0,46,56],"models":[1],"revolutionize":[2],"image":[3,95],"generation":[4,65,100,139],"by":[5,149],"leveraging":[6],"natural":[7],"language":[8],"to":[9,48,62,78,91,131,144],"guide":[10],"the":[11,64,80,83,93,133],"creation":[12],"of":[13,70,82,106],"multimedia":[14],"content.":[15],"Despite":[16],"significant":[17,134],"advancements":[18],"in":[19,25,116,136,153],"such":[20],"generative":[21],"models,":[22],"challenges":[23],"persist":[24],"depicting":[26],"detailed":[27],"human-object":[28],"interactions,":[29],"especially":[30],"regarding":[31],"pose":[32,107],"and":[33,44,109,161],"object":[34,110],"placement":[35],"accuracy.":[36],"We":[37,72,124],"introduce":[38],"a":[39,103],"training-free":[40],"method":[41],"named":[42],"Reasoning":[43],"Correcting":[45],"(ReCorD)":[47],"address":[49],"these":[50],"challenges.":[51],"Our":[52],"model":[53],"couples":[54],"Latent":[55],"Models":[57,61],"with":[58,86],"Visual":[59],"Language":[60],"refine":[63,92],"process,":[66],"ensuring":[67],"precise":[68,98],"depictions":[69],"HOIs.":[71],"propose":[73],"an":[74,87],"interaction-aware":[75],"reasoning":[76],"module":[77,90],"improve":[79],"interpretation":[81],"interaction,":[84],"along":[85],"interaction":[88],"correcting":[89],"output":[94],"for":[96],"more":[97],"HOI":[99,154],"delicately.":[101],"Through":[102],"meticulous":[104],"process":[105],"selection":[108],"positioning,":[111],"ReCorD":[112],"achieves":[113],"superior":[114],"fidelity":[115],"generated":[117],"images":[118],"while":[119],"efficiently":[120],"reducing":[121],"computational":[122],"requirements.":[123],"conduct":[125],"comprehensive":[126],"experiments":[127],"on":[128],"three":[129],"benchmarks":[130],"demonstrate":[132],"progress":[135],"solving":[137],"text-to-image":[138],"tasks,":[140],"showcasing":[141],"ReCorD's":[142],"ability":[143],"render":[145],"complex":[146],"interactions":[147],"accurately":[148],"outperforming":[150],"existing":[151],"methods":[152],"classification":[155],"score,":[156],"as":[157,159],"well":[158],"FID":[160],"Verb":[162],"CLIP-Score.":[163],"Project":[164],"website":[165],"is":[166],"available":[167],"at":[168],"https://alberthkyhky.github.io/ReCorD/":[169],".":[170]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":1}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
