{"id":"https://openalex.org/W4404361710","doi":"https://doi.org/10.48550/arxiv.2411.05005","title":"Diff-2-in-1: Bridging Generation and Dense Perception with Diffusion Models","display_name":"Diff-2-in-1: Bridging Generation and Dense Perception with Diffusion Models","publication_year":2024,"publication_date":"2024-11-07","ids":{"openalex":"https://openalex.org/W4404361710","doi":"https://doi.org/10.48550/arxiv.2411.05005"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2411.05005","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2411.05005","pdf_url":"https://arxiv.org/pdf/2411.05005","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2411.05005","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5075364910","display_name":"Shuhong Zheng","orcid":"https://orcid.org/0000-0003-3160-9532"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zheng, Shuhong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5076213540","display_name":"Zhipeng Bao","orcid":"https://orcid.org/0009-0008-9898-3741"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bao, Zhipeng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5007599604","display_name":"Ruoyu Zhao","orcid":"https://orcid.org/0000-0003-3631-1890"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhao, Ruoyu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5075246991","display_name":"Martial Hebert","orcid":"https://orcid.org/0000-0003-4566-5930"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hebert, Martial","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5039231803","display_name":"Yu-Xiong Wang","orcid":"https://orcid.org/0000-0003-1195-2739"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Yu-Xiong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10320","display_name":"Neural Networks and Applications","score":0.6761999726295471,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10320","display_name":"Neural Networks and Applications","score":0.6761999726295471,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/bridging","display_name":"Bridging (networking)","score":0.9495251178741455},{"id":"https://openalex.org/keywords/perception","display_name":"Perception","score":0.5560156106948853},{"id":"https://openalex.org/keywords/diffusion","display_name":"Diffusion","score":0.469178706407547},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.34582293033599854},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.29110977053642273},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.1746339499950409},{"id":"https://openalex.org/keywords/neuroscience","display_name":"Neuroscience","score":0.11992388963699341},{"id":"https://openalex.org/keywords/thermodynamics","display_name":"Thermodynamics","score":0.05003654956817627}],"concepts":[{"id":"https://openalex.org/C174348530","wikidata":"https://www.wikidata.org/wiki/Q188635","display_name":"Bridging (networking)","level":2,"score":0.9495251178741455},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.5560156106948853},{"id":"https://openalex.org/C69357855","wikidata":"https://www.wikidata.org/wiki/Q163214","display_name":"Diffusion","level":2,"score":0.469178706407547},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.34582293033599854},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.29110977053642273},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.1746339499950409},{"id":"https://openalex.org/C169760540","wikidata":"https://www.wikidata.org/wiki/Q207011","display_name":"Neuroscience","level":1,"score":0.11992388963699341},{"id":"https://openalex.org/C97355855","wikidata":"https://www.wikidata.org/wiki/Q11473","display_name":"Thermodynamics","level":1,"score":0.05003654956817627},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2411.05005","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2411.05005","pdf_url":"https://arxiv.org/pdf/2411.05005","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"doi:10.48550/arxiv.2411.05005","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2411.05005","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2411.05005","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2411.05005","pdf_url":"https://arxiv.org/pdf/2411.05005","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W4388870064","https://openalex.org/W2210139803","https://openalex.org/W4235186151","https://openalex.org/W2054685365","https://openalex.org/W2056057048","https://openalex.org/W2667588871","https://openalex.org/W2272354214","https://openalex.org/W2084768720"],"abstract_inverted_index":{"Beyond":[0],"high-fidelity":[1],"image":[2],"synthesis,":[3],"diffusion":[4,21],"models":[5,22],"have":[6],"recently":[7],"exhibited":[8],"promising":[9],"results":[10],"in":[11],"dense":[12,69],"visual":[13,70,87],"perception":[14,28,88],"tasks.":[15],"However,":[16],"most":[17],"existing":[18],"work":[19],"treats":[20],"as":[23,39],"a":[24,54,73,124],"standalone":[25],"component":[26],"for":[27,34],"tasks,":[29],"employing":[30],"them":[31],"either":[32],"solely":[33],"off-the-shelf":[35],"data":[36,66,100,121,149],"augmentation":[37],"or":[38],"mere":[40],"feature":[41],"extractors.":[42],"In":[43],"contrast":[44],"to":[45,97],"these":[46],"isolated":[47],"and":[48,68,119,146,155],"thus":[49],"sub-optimal":[50],"efforts,":[51],"we":[52,83],"introduce":[53],"unified,":[55],"versatile,":[56],"diffusion-based":[57],"framework,":[58,82,137],"Diff-2-in-1,":[59],"that":[60,101],"can":[61],"simultaneously":[62],"handle":[63],"both":[64,153],"multi-modal":[65,90,99,148],"generation":[67,150],"perception,":[71],"through":[72],"unique":[74],"exploitation":[75],"of":[76,105,115,135],"the":[77,94,103,106,113,116,133],"diffusion-denoising":[78],"process.":[79],"Within":[80],"this":[81],"further":[84],"enhance":[85],"discriminative":[86,144],"via":[89],"generation,":[91],"by":[92,122,152],"utilizing":[93],"denoising":[95],"network":[96],"create":[98],"mirror":[102],"distribution":[104],"original":[107],"training":[108],"set.":[109],"Importantly,":[110],"Diff-2-in-1":[111],"optimizes":[112],"utilization":[114],"created":[117],"diverse":[118],"faithful":[120],"leveraging":[123],"novel":[125],"self-improving":[126],"learning":[127],"mechanism.":[128],"Comprehensive":[129],"experimental":[130],"evaluations":[131],"validate":[132],"effectiveness":[134],"our":[136],"showcasing":[138],"consistent":[139],"performance":[140],"improvements":[141],"across":[142],"various":[143],"backbones":[145],"high-quality":[147],"characterized":[151],"realism":[154],"usefulness.":[156]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2024-11-15T00:00:00"}
