{"id":"https://openalex.org/W4417073296","doi":"https://doi.org/10.1145/3743093.3771078","title":"Prefix-Guided Adaptation of Pretrained Segmenter for RGB-D Indoor Panoptic Segmentation","display_name":"Prefix-Guided Adaptation of Pretrained Segmenter for RGB-D Indoor Panoptic Segmentation","publication_year":2025,"publication_date":"2025-12-06","ids":{"openalex":"https://openalex.org/W4417073296","doi":"https://doi.org/10.1145/3743093.3771078"},"language":null,"primary_location":{"id":"doi:10.1145/3743093.3771078","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3743093.3771078","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 7th ACM International Conference on Multimedia in Asia","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Yibin Wang","orcid":"https://orcid.org/0009-0002-7693-3029"},"institutions":[{"id":"https://openalex.org/I116953780","display_name":"Tongji University","ror":"https://ror.org/03rc6as71","country_code":"CN","type":"education","lineage":["https://openalex.org/I116953780"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yibin Wang","raw_affiliation_strings":["Tongji University, Shanghai, China"],"raw_orcid":"https://orcid.org/0009-0002-7693-3029","affiliations":[{"raw_affiliation_string":"Tongji University, Shanghai, China","institution_ids":["https://openalex.org/I116953780"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Yuanyang Wang","orcid":"https://orcid.org/0009-0009-7271-6327"},"institutions":[{"id":"https://openalex.org/I116953780","display_name":"Tongji University","ror":"https://ror.org/03rc6as71","country_code":"CN","type":"education","lineage":["https://openalex.org/I116953780"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuanyang Wang","raw_affiliation_strings":["Tongji University, Shanghai, China"],"raw_orcid":"https://orcid.org/0009-0009-7271-6327","affiliations":[{"raw_affiliation_string":"Tongji University, Shanghai, China","institution_ids":["https://openalex.org/I116953780"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5120692689","display_name":"Tuowei Qu","orcid":"https://orcid.org/0009-0000-8358-1821"},"institutions":[{"id":"https://openalex.org/I116953780","display_name":"Tongji University","ror":"https://ror.org/03rc6as71","country_code":"CN","type":"education","lineage":["https://openalex.org/I116953780"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Tuowei Qu","raw_affiliation_strings":["Tongji University, Shanghai, China"],"raw_orcid":"https://orcid.org/0009-0000-8358-1821","affiliations":[{"raw_affiliation_string":"Tongji University, Shanghai, China","institution_ids":["https://openalex.org/I116953780"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Jiaxuan Zhu","orcid":"https://orcid.org/0009-0009-5451-3120"},"institutions":[{"id":"https://openalex.org/I116953780","display_name":"Tongji University","ror":"https://ror.org/03rc6as71","country_code":"CN","type":"education","lineage":["https://openalex.org/I116953780"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiaxuan Zhu","raw_affiliation_strings":["Tongji University, Shanghai, China"],"raw_orcid":"https://orcid.org/0009-0009-5451-3120","affiliations":[{"raw_affiliation_string":"Tongji University, Shanghai, China","institution_ids":["https://openalex.org/I116953780"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5091831237","display_name":"Yu Fang","orcid":"https://orcid.org/0000-0003-1176-3225"},"institutions":[{"id":"https://openalex.org/I116953780","display_name":"Tongji University","ror":"https://ror.org/03rc6as71","country_code":"CN","type":"education","lineage":["https://openalex.org/I116953780"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yu Fang","raw_affiliation_strings":["Tongji University, Shanghai, China"],"raw_orcid":"https://orcid.org/0000-0003-1176-3225","affiliations":[{"raw_affiliation_string":"Tongji University, Shanghai, China","institution_ids":["https://openalex.org/I116953780"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I116953780"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.38061188,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"8"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.870199978351593,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.870199978351593,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10191","display_name":"Robotics and Sensor-Based Localization","score":0.019500000402331352,"subfield":{"id":"https://openalex.org/subfields/2202","display_name":"Aerospace Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11164","display_name":"Remote Sensing and LiDAR Applications","score":0.017400000244379044,"subfield":{"id":"https://openalex.org/subfields/2305","display_name":"Environmental Engineering"},"field":{"id":"https://openalex.org/fields/23","display_name":"Environmental Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.7419000267982483},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.47350001335144043},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.4359999895095825},{"id":"https://openalex.org/keywords/panopticon","display_name":"Panopticon","score":0.4122999906539917},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.4099999964237213},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.40230000019073486},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.39750000834465027},{"id":"https://openalex.org/keywords/convolution","display_name":"Convolution (computer science)","score":0.38530001044273376},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.382099986076355}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8008999824523926},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.7419000267982483},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6881999969482422},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.4823000133037567},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.47350001335144043},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.4359999895095825},{"id":"https://openalex.org/C138569888","wikidata":"https://www.wikidata.org/wiki/Q828310","display_name":"Panopticon","level":3,"score":0.4122999906539917},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.4099999964237213},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.40230000019073486},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.39750000834465027},{"id":"https://openalex.org/C45347329","wikidata":"https://www.wikidata.org/wiki/Q5166604","display_name":"Convolution (computer science)","level":3,"score":0.38530001044273376},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.382099986076355},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.37959998846054077},{"id":"https://openalex.org/C82990744","wikidata":"https://www.wikidata.org/wiki/Q166194","display_name":"RGB color model","level":2,"score":0.3659000098705292},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.3547999858856201},{"id":"https://openalex.org/C110384440","wikidata":"https://www.wikidata.org/wiki/Q1143270","display_name":"Upsampling","level":3,"score":0.3447999954223633},{"id":"https://openalex.org/C139807058","wikidata":"https://www.wikidata.org/wiki/Q352374","display_name":"Adaptation (eye)","level":2,"score":0.3377000093460083},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.33489999175071716},{"id":"https://openalex.org/C142575187","wikidata":"https://www.wikidata.org/wiki/Q3358290","display_name":"Pyramid (geometry)","level":2,"score":0.3319000005722046},{"id":"https://openalex.org/C124504099","wikidata":"https://www.wikidata.org/wiki/Q56933","display_name":"Image segmentation","level":3,"score":0.27160000801086426},{"id":"https://openalex.org/C65885262","wikidata":"https://www.wikidata.org/wiki/Q7429708","display_name":"Scale-space segmentation","level":4,"score":0.2700999975204468},{"id":"https://openalex.org/C67795661","wikidata":"https://www.wikidata.org/wiki/Q17018993","display_name":"Shearlet","level":3,"score":0.2667999863624573},{"id":"https://openalex.org/C94124525","wikidata":"https://www.wikidata.org/wiki/Q912550","display_name":"Categorization","level":2,"score":0.2632000148296356},{"id":"https://openalex.org/C2776151529","wikidata":"https://www.wikidata.org/wiki/Q3045304","display_name":"Object detection","level":3,"score":0.2531000077724457},{"id":"https://openalex.org/C2776434776","wikidata":"https://www.wikidata.org/wiki/Q19246213","display_name":"Domain adaptation","level":3,"score":0.2508000135421753}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3743093.3771078","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3743093.3771078","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 7th ACM International Conference on Multimedia in Asia","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":26,"referenced_works":["https://openalex.org/W125693051","https://openalex.org/W1923184257","https://openalex.org/W2963837081","https://openalex.org/W2999219213","https://openalex.org/W3099793224","https://openalex.org/W3174770825","https://openalex.org/W4230575152","https://openalex.org/W4286982960","https://openalex.org/W4312263794","https://openalex.org/W4312610896","https://openalex.org/W4312815172","https://openalex.org/W4313142416","https://openalex.org/W4383108875","https://openalex.org/W4385488625","https://openalex.org/W4386066126","https://openalex.org/W4386076253","https://openalex.org/W4386179772","https://openalex.org/W4386790226","https://openalex.org/W4386815463","https://openalex.org/W4389667339","https://openalex.org/W4390190100","https://openalex.org/W4390492478","https://openalex.org/W4391465821","https://openalex.org/W4394597798","https://openalex.org/W4405072733","https://openalex.org/W4405785392"],"related_works":[],"abstract_inverted_index":{"RGB-D":[0,49,71],"indoor":[1],"panoptic":[2,97],"segmentation":[3,45,98],"has":[4],"seen":[5],"limited":[6],"progress,":[7],"largely":[8],"due":[9],"to":[10,47,117],"the":[11,19,48,90,118],"scarcity":[12],"of":[13,22,82],"large-scale,":[14],"densely":[15],"annotated":[16],"datasets":[17],"and":[18,74,84,128],"poor":[20],"generalization":[21,116],"existing":[23],"methods,":[24],"which":[25],"often":[26],"require":[27],"dataset-specific":[28],"retraining.":[29],"To":[30],"address":[31],"these":[32],"challenges,":[33],"we":[34,59],"propose":[35],"a":[36,53],"generalizable":[37],"fine-tuning":[38],"framework":[39],"that":[40],"efficiently":[41],"adapts":[42],"pretrained":[43],"RGB":[44],"model":[46],"domain":[50],"using":[51],"only":[52],"small,":[54],"high-quality":[55],"dataset":[56],"(NYUv2).":[57],"Specifically,":[58],"introduce":[60],"two":[61],"novel":[62],"modules:":[63],"Mutual":[64],"Differential":[65],"Convolution":[66],"Attention":[67],"(MDCA)":[68],"for":[69,79,126],"geometry-aware":[70],"feature":[72],"fusion,":[73],"Prefix-Guided":[75],"Adapter":[76],"Ensemble":[77],"(PGAE)":[78],"efficient":[80],"modulation":[81],"encoder":[83],"decoder":[85],"attention":[86],"pathways":[87],"without":[88],"modifying":[89],"original":[91],"architecture.":[92],"Our":[93],"approach":[94],"achieves":[95],"state-of-the-art":[96],"performance":[99],"on":[100],"NYUv2,":[101],"surpassing":[102],"prior":[103],"methods":[104],"by":[105],"4.46%":[106],"in":[107],"Panoptic":[108],"Quality":[109],"(PQ).":[110],"Moreover,":[111],"it":[112],"demonstrates":[113],"strong":[114],"zero-shot":[115],"significantly":[119],"larger":[120],"SUNRGB-D":[121],"dataset,":[122],"highlighting":[123],"its":[124],"effectiveness":[125],"scalable":[127],"transferable":[129],"multi-modal":[130],"scene":[131],"understanding.":[132]},"counts_by_year":[],"updated_date":"2025-12-06T23:14:57.273132","created_date":"2025-12-06T00:00:00"}
