{"id":"https://openalex.org/W4414263695","doi":"https://doi.org/10.1109/tpami.2025.3609962","title":"Object Detection Data Synthesis via Box-to-Image Generation Based on Diffusion Models","display_name":"Object Detection Data Synthesis via Box-to-Image Generation Based on Diffusion Models","publication_year":2025,"publication_date":"2025-09-15","ids":{"openalex":"https://openalex.org/W4414263695","doi":"https://doi.org/10.1109/tpami.2025.3609962","pmid":"https://pubmed.ncbi.nlm.nih.gov/40953433"},"language":"en","primary_location":{"id":"doi:10.1109/tpami.2025.3609962","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpami.2025.3609962","pdf_url":null,"source":{"id":"https://openalex.org/S199944782","display_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","issn_l":"0162-8828","issn":["0162-8828","1939-3539","2160-9292"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Jingyuan Zhu","orcid":"https://orcid.org/0000-0003-3995-8382"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Jingyuan Zhu","raw_affiliation_strings":["Department of Electronic Engineering, Tsinghua University, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0003-3995-8382","affiliations":[{"raw_affiliation_string":"Department of Electronic Engineering, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5006236325","display_name":"Huimin Ma","orcid":"https://orcid.org/0000-0001-5383-5667"},"institutions":[{"id":"https://openalex.org/I92403157","display_name":"University of Science and Technology Beijing","ror":"https://ror.org/02egmk993","country_code":"CN","type":"education","lineage":["https://openalex.org/I92403157"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Huimin Ma","raw_affiliation_strings":["School of Computer and Communication Engineering, University of Science and Technology Beijing, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0001-5383-5667","affiliations":[{"raw_affiliation_string":"School of Computer and Communication Engineering, University of Science and Technology Beijing, Beijing, China","institution_ids":["https://openalex.org/I92403157"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Jiansheng Chen","orcid":"https://orcid.org/0000-0002-2040-7938"},"institutions":[{"id":"https://openalex.org/I92403157","display_name":"University of Science and Technology Beijing","ror":"https://ror.org/02egmk993","country_code":"CN","type":"education","lineage":["https://openalex.org/I92403157"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiansheng Chen","raw_affiliation_strings":["School of Computer and Communication Engineering, University of Science and Technology Beijing, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0002-2040-7938","affiliations":[{"raw_affiliation_string":"School of Computer and Communication Engineering, University of Science and Technology Beijing, Beijing, China","institution_ids":["https://openalex.org/I92403157"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101154546","display_name":"Jian Yuan","orcid":null},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jian Yuan","raw_affiliation_strings":["Department of Electronic Engineering, Tsinghua University, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0001-9734-6056","affiliations":[{"raw_affiliation_string":"Department of Electronic Engineering, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I99065089"],"apc_list":null,"apc_paid":null,"fwci":1.1332,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.82542604,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":98},"biblio":{"volume":"48","issue":"1","first_page":"557","last_page":"571"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9592000246047974,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9592000246047974,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10824","display_name":"Image Retrieval and Classification Techniques","score":0.902999997138977,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.7346000075340271},{"id":"https://openalex.org/keywords/object-detection","display_name":"Object detection","score":0.703499972820282},{"id":"https://openalex.org/keywords/minimum-bounding-box","display_name":"Minimum bounding box","score":0.6137999892234802},{"id":"https://openalex.org/keywords/synthetic-data","display_name":"Synthetic data","score":0.4636000096797943},{"id":"https://openalex.org/keywords/generative-model","display_name":"Generative model","score":0.450300008058548},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.44209998846054077},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4291999936103821},{"id":"https://openalex.org/keywords/controllability","display_name":"Controllability","score":0.4287000000476837},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.3831000030040741}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7943999767303467},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.7346000075340271},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7055000066757202},{"id":"https://openalex.org/C2776151529","wikidata":"https://www.wikidata.org/wiki/Q3045304","display_name":"Object detection","level":3,"score":0.703499972820282},{"id":"https://openalex.org/C147037132","wikidata":"https://www.wikidata.org/wiki/Q6865426","display_name":"Minimum bounding box","level":3,"score":0.6137999892234802},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.603600025177002},{"id":"https://openalex.org/C160920958","wikidata":"https://www.wikidata.org/wiki/Q7662746","display_name":"Synthetic data","level":2,"score":0.4636000096797943},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.450300008058548},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.44209998846054077},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4291999936103821},{"id":"https://openalex.org/C48209547","wikidata":"https://www.wikidata.org/wiki/Q1331104","display_name":"Controllability","level":2,"score":0.4287000000476837},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.3831000030040741},{"id":"https://openalex.org/C63584917","wikidata":"https://www.wikidata.org/wiki/Q333286","display_name":"Bounding overwatch","level":2,"score":0.3596999943256378},{"id":"https://openalex.org/C182521987","wikidata":"https://www.wikidata.org/wiki/Q2493877","display_name":"Viola\u2013Jones object detection framework","level":5,"score":0.349700003862381},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.34299999475479126},{"id":"https://openalex.org/C94915269","wikidata":"https://www.wikidata.org/wiki/Q1834857","display_name":"Detector","level":2,"score":0.3294000029563904},{"id":"https://openalex.org/C20894473","wikidata":"https://www.wikidata.org/wiki/Q1116105","display_name":"Object model","level":3,"score":0.32899999618530273},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.3190000057220459},{"id":"https://openalex.org/C202474056","wikidata":"https://www.wikidata.org/wiki/Q1931635","display_name":"Video tracking","level":3,"score":0.3057999908924103},{"id":"https://openalex.org/C64876066","wikidata":"https://www.wikidata.org/wiki/Q5141226","display_name":"Cognitive neuroscience of visual object recognition","level":3,"score":0.30309998989105225},{"id":"https://openalex.org/C50494287","wikidata":"https://www.wikidata.org/wiki/Q658467","display_name":"Texture synthesis","level":5,"score":0.2870999872684479},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.26499998569488525},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.26420000195503235},{"id":"https://openalex.org/C141379421","wikidata":"https://www.wikidata.org/wiki/Q6094427","display_name":"Iterative reconstruction","level":2,"score":0.2621000111103058},{"id":"https://openalex.org/C124504099","wikidata":"https://www.wikidata.org/wiki/Q56933","display_name":"Image segmentation","level":3,"score":0.260699987411499}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tpami.2025.3609962","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpami.2025.3609962","pdf_url":null,"source":{"id":"https://openalex.org/S199944782","display_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","issn_l":"0162-8828","issn":["0162-8828","1939-3539","2160-9292"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","raw_type":"journal-article"},{"id":"pmid:40953433","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/40953433","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on pattern analysis and machine intelligence","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G5083284488","display_name":null,"funder_award_id":"2022ZD0117902","funder_id":"https://openalex.org/F4320329860","funder_display_name":"National Science and Technology Major Project"},{"id":"https://openalex.org/G5631758027","display_name":null,"funder_award_id":"62227801","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320329860","display_name":"National Science and Technology Major Project","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":42,"referenced_works":["https://openalex.org/W1861492603","https://openalex.org/W2037227137","https://openalex.org/W2108598243","https://openalex.org/W2194775991","https://openalex.org/W2507296351","https://openalex.org/W2737258237","https://openalex.org/W2883820570","https://openalex.org/W2962770929","https://openalex.org/W2983943451","https://openalex.org/W3035564946","https://openalex.org/W3035574168","https://openalex.org/W3035574324","https://openalex.org/W3167788848","https://openalex.org/W3174807077","https://openalex.org/W3176659256","https://openalex.org/W3203593958","https://openalex.org/W4288083516","https://openalex.org/W4307900253","https://openalex.org/W4312933868","https://openalex.org/W4362641980","https://openalex.org/W4385245566","https://openalex.org/W4386066731","https://openalex.org/W4386072096","https://openalex.org/W4386075819","https://openalex.org/W4386076027","https://openalex.org/W4386076325","https://openalex.org/W4386076403","https://openalex.org/W4390871935","https://openalex.org/W4390872387","https://openalex.org/W4390872876","https://openalex.org/W4390873054","https://openalex.org/W4390873442","https://openalex.org/W4394597085","https://openalex.org/W4402623742","https://openalex.org/W4402716206","https://openalex.org/W4402727668","https://openalex.org/W4402754201","https://openalex.org/W4403049104","https://openalex.org/W4404612908","https://openalex.org/W4412468250","https://openalex.org/W4415797382","https://openalex.org/W6950310954"],"related_works":[],"abstract_inverted_index":{"Modern":[0],"diffusion-based":[1],"image":[2],"generative":[3,156],"models":[4],"have":[5],"made":[6],"significant":[7],"progress":[8],"and":[9,25,34,81,103,114,151,177],"become":[10],"promising":[11],"to":[12,48,84,91,124,131,143,167,182,200],"enrich":[13],"training":[14,136],"data":[15,58,137,171],"for":[16,28,60],"the":[17,22,26,93,169],"object":[18,61,66,147,197,213],"detection":[19,67,198],"task.":[20],"However,":[21],"generation":[23],"quality":[24],"controllability":[27],"complex":[29,112],"scenes":[30,113],"containing":[31],"multi-class":[32],"objects":[33,36,80],"dense":[35],"with":[37,100,146],"occlusions":[38],"remain":[39],"limited.":[40],"This":[41],"paper":[42],"presents":[43],"ODGEN,":[44],"a":[45,64,72,120,193,202],"novel":[46],"method":[47],"generate":[49],"high-quality":[50],"images":[51,83],"conditioned":[52],"on":[53,76,127,165],"bounding":[54],"boxes,":[55],"thereby":[56],"facilitating":[57],"synthesis":[59,122],"detection.":[62],"Given":[63],"domain-specific":[65,129],"dataset,":[68],"we":[69,89,118,191],"first":[70],"fine-tune":[71],"pre-trained":[73],"diffusion":[74,94],"model":[75,95,204],"both":[77],"cropped":[78],"foreground":[79],"entire":[82],"fit":[85],"target":[86],"distributions.":[87],"Then":[88],"propose":[90],"control":[92],"using":[96],"synthesized":[97],"visual":[98],"prompts":[99],"spatial":[101],"constraints":[102],"object-wise":[104],"textual":[105],"descriptions.":[106],"ODGEN":[107,126,140,173],"exhibits":[108],"robustness":[109],"in":[110,174,184,215],"handling":[111],"specific":[115],"domains.":[116],"Further,":[117],"design":[119,160],"dataset":[121],"pipeline":[123],"evaluate":[125],"7":[128],"benchmarks":[130],"demonstrate":[132],"its":[133],"effectiveness.":[134],"Adding":[135],"generated":[138],"by":[139],"improves":[141],"up":[142,181],"25.3%":[144],"mAP@.50:.95":[145,185],"detectors":[148],"like":[149],"YOLOv5":[150],"YOLOv7,":[152],"outperforming":[153],"prior":[154],"controllable":[155],"methods.":[157,188],"We":[158],"also":[159],"an":[161,179],"evaluation":[162],"protocol":[163],"based":[164],"COCO-2014":[166],"validate":[168],"synthetic":[170],"of":[172,195,212],"general":[175,203],"domains":[176],"observe":[178],"advantage":[180],"5.6%":[183],"against":[186],"existing":[187],"In":[189],"addition,":[190],"employ":[192],"series":[194],"large-scale":[196],"datasets":[199],"train":[201],"named":[205],"Stable":[206],"Box":[207],"Diffusion,":[208],"which":[209],"covers":[210],"thousands":[211],"categories":[214],"most":[216],"common":[217],"scenes.":[218]},"counts_by_year":[{"year":2026,"cited_by_count":1}],"updated_date":"2025-12-04T23:47:47.292601","created_date":"2025-10-10T00:00:00"}
