{"id":"https://openalex.org/W4399759491","doi":"https://doi.org/10.48550/arxiv.2406.10197","title":"Composing Parts for Expressive Object Generation","display_name":"Composing Parts for Expressive Object Generation","publication_year":2024,"publication_date":"2024-06-14","ids":{"openalex":"https://openalex.org/W4399759491","doi":"https://doi.org/10.48550/arxiv.2406.10197"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2406.10197","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2406.10197","pdf_url":"https://arxiv.org/pdf/2406.10197","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2406.10197","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5061892438","display_name":"Harsh Rangwani","orcid":"https://orcid.org/0000-0001-7945-7641"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Rangwani, Harsh","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101908796","display_name":"Aishwarya Agarwal","orcid":"https://orcid.org/0000-0002-5368-2442"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Agarwal, Aishwarya","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5077902428","display_name":"Kuldeep Kulkarni","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kulkarni, Kuldeep","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5104083191","display_name":"R. Venkatesh Babu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Babu, R. Venkatesh","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5044410531","display_name":"Srikrishna Karanam","orcid":"https://orcid.org/0000-0002-7627-7765"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Karanam, Srikrishna","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5061892438"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12290","display_name":"Human Motion and Animation","score":0.9128999710083008,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12290","display_name":"Human Motion and Animation","score":0.9128999710083008,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T14339","display_name":"Image Processing and 3D Reconstruction","score":0.9104999899864197,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/composition","display_name":"Composition (language)","score":0.6879541873931885},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.5668466687202454},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5476996302604675},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.3586074709892273},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.33419209718704224},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.2392078936100006},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.21505165100097656},{"id":"https://openalex.org/keywords/philosophy","display_name":"Philosophy","score":0.08102115988731384}],"concepts":[{"id":"https://openalex.org/C40231798","wikidata":"https://www.wikidata.org/wiki/Q1333743","display_name":"Composition (language)","level":2,"score":0.6879541873931885},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.5668466687202454},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5476996302604675},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.3586074709892273},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.33419209718704224},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2392078936100006},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.21505165100097656},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.08102115988731384}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2406.10197","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2406.10197","pdf_url":"https://arxiv.org/pdf/2406.10197","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2406.10197","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2406.10197","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2406.10197","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2406.10197","pdf_url":"https://arxiv.org/pdf/2406.10197","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4399759491.pdf"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052","https://openalex.org/W2382290278","https://openalex.org/W4395014643"],"abstract_inverted_index":{"Image":[0],"composition":[1],"and":[2,103,159,200],"generation":[3,22,83],"are":[4,41,171],"processes":[5],"where":[6],"the":[7,15,19,33,44,58,62,67,93,120,137,164,188],"artists":[8,102],"need":[9],"control":[10,100,192],"over":[11],"various":[12],"parts":[13,117],"of":[14,169,190],"generated":[16,60],"images.":[17],"However,":[18],"current":[20],"state-of-the-art":[21],"models,":[23],"like":[24],"Stable":[25],"Diffusion,":[26],"cannot":[27],"handle":[28],"fine-grained":[29,86,156],"part-level":[30,87,191],"attributes":[31,88,158],"in":[32,92,150],"text":[34,46,95],"prompts.":[35],"Specifically,":[36],"when":[37],"additional":[38],"attribute":[39,68],"details":[40],"added":[42],"to":[43,133,136,162,182],"base":[45,63,94],"prompt,":[47],"these":[48,72],"text-to-image":[49],"models":[50],"either":[51],"generate":[52],"an":[53],"image":[54,59,82],"vastly":[55],"different":[56],"from":[57,61,123],"prompt":[64],"or":[65],"ignore":[66],"details.":[69],"To":[70],"mitigate":[71],"issues,":[73],"we":[74,144],"introduce":[75],"PartComposer,":[76],"a":[77,124,146,175],"training-free":[78],"method":[79],"that":[80],"enables":[81,104,129,180],"based":[84,154,172],"on":[85,155,173],"specified":[89],"for":[90,101],"objects":[91],"prompt.":[96],"This":[97,128],"allows":[98],"more":[99],"novel":[105],"object":[106,111,116,121],"compositions":[107],"by":[108,118,194],"combining":[109],"distinctive":[110],"parts.":[112],"PartComposer":[113,170,195],"first":[114],"localizes":[115],"denoising":[119],"region":[122,153],"specific":[125],"diffusion":[126,148,177],"process.":[127],"each":[130,151],"part":[131,142,152,157],"token":[132],"be":[134],"localized":[135,147],"right":[138],"region.":[139],"After":[140],"obtaining":[141],"masks,":[143],"run":[145],"process":[149],"combine":[160],"them":[161],"produce":[163],"final":[165],"image.":[166],"All":[167],"stages":[168],"repurposing":[174],"pre-trained":[176],"model,":[178],"which":[179],"it":[181],"generalize":[183],"across":[184],"domains.":[185],"We":[186],"demonstrate":[187],"effectiveness":[189],"provided":[193],"through":[196],"qualitative":[197],"visual":[198],"examples":[199],"quantitative":[201],"comparisons":[202],"with":[203],"contemporary":[204],"baselines.":[205]},"counts_by_year":[],"updated_date":"2026-03-11T14:59:36.786465","created_date":"2024-06-18T00:00:00"}
