{"id":"https://openalex.org/W7138840396","doi":"https://doi.org/10.48550/arxiv.2603.17520","title":"PCA-Seg: Revisiting Cost Aggregation for Open-Vocabulary Semantic and Part Segmentation","display_name":"PCA-Seg: Revisiting Cost Aggregation for Open-Vocabulary Semantic and Part Segmentation","publication_year":2026,"publication_date":"2026-03-18","ids":{"openalex":"https://openalex.org/W7138840396","doi":"https://doi.org/10.48550/arxiv.2603.17520"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.17520","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.17520","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.17520","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101684340","display_name":"Jianjian Yin","orcid":"https://orcid.org/0000-0002-0445-454X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yin, Jianjian","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129781587","display_name":"Tao Chen (75911)","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Tao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130172835","display_name":"Yi Jing Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Yi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5082756213","display_name":"Gensheng Pei","orcid":"https://orcid.org/0000-0002-7677-7487"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Pei, Gensheng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129849315","display_name":"Xiangbo Shu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shu, Xiangbo","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5027545344","display_name":"Yazhou Yao","orcid":"https://orcid.org/0000-0002-0337-9410"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yao, Yazhou","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5129886760","display_name":"Fumin Shen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shen, Fumin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":7,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9139999747276306,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9139999747276306,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.017400000244379044,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.010200000368058681,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/redundancy","display_name":"Redundancy (engineering)","score":0.6477000117301941},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.5735999941825867},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.4700999855995178},{"id":"https://openalex.org/keywords/parsing","display_name":"Parsing","score":0.45249998569488525},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.42089998722076416},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.41040000319480896},{"id":"https://openalex.org/keywords/orthogonalization","display_name":"Orthogonalization","score":0.3765000104904175}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8130000233650208},{"id":"https://openalex.org/C152124472","wikidata":"https://www.wikidata.org/wiki/Q1204361","display_name":"Redundancy (engineering)","level":2,"score":0.6477000117301941},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.5735999941825867},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5659000277519226},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.4700999855995178},{"id":"https://openalex.org/C186644900","wikidata":"https://www.wikidata.org/wiki/Q194152","display_name":"Parsing","level":2,"score":0.45249998569488525},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.42089998722076416},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.41040000319480896},{"id":"https://openalex.org/C47559304","wikidata":"https://www.wikidata.org/wiki/Q1702189","display_name":"Orthogonalization","level":2,"score":0.3765000104904175},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.35850000381469727},{"id":"https://openalex.org/C2777210771","wikidata":"https://www.wikidata.org/wiki/Q4927124","display_name":"Block (permutation group theory)","level":2,"score":0.3449000120162964},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3212999999523163},{"id":"https://openalex.org/C205606062","wikidata":"https://www.wikidata.org/wiki/Q5249645","display_name":"Decoupling (probability)","level":2,"score":0.3012000024318695},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.28209999203681946},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.2775000035762787},{"id":"https://openalex.org/C122770356","wikidata":"https://www.wikidata.org/wiki/Q1656753","display_name":"Identifiability","level":2,"score":0.27459999918937683},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.265500009059906},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.25760000944137573},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.25270000100135803}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.17520","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.17520","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.17520","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.17520","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"display_name":"Quality Education","score":0.7149044871330261,"id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Recent":[0],"advances":[1],"in":[2,10,172],"vision-language":[3,70],"models":[4],"(VLMs)":[5],"have":[6],"garnered":[7],"substantial":[8],"attention":[9],"open-vocabulary":[11],"semantic":[12,88,146],"and":[13,33,43,89,128,147],"part":[14],"segmentation":[15],"(OSPS).":[16],"However,":[17],"existing":[18],"methods":[19],"extract":[20,98],"image-text":[21],"alignment":[22,71],"cues":[23],"from":[24,73,101,159],"cost":[25,55,74],"volumes":[26],"through":[27],"a":[28,50,94,106,126,135],"serial":[29],"structure":[30],"of":[31,122],"spatial":[32,44],"class":[34],"aggregations,":[35],"leading":[36],"to":[37,59,67,97,111,141,155],"knowledge":[38,124,158],"interference":[39],"between":[40,144],"class-level":[41],"semantics":[42],"context.":[45],"Therefore,":[46],"this":[47],"paper":[48],"proposes":[49],"simple":[51],"yet":[52],"effective":[53],"parallel":[54,170],"aggregation":[56],"(PCA-Seg)":[57],"paradigm":[58],"alleviate":[60],"the":[61,65,120,145,152],"above":[62],"challenge,":[63],"enabling":[64,119],"model":[66],"capture":[68],"richer":[69],"information":[72],"volumes.":[75],"Specifically,":[76],"we":[77,133],"design":[78],"an":[79],"expert-driven":[80],"perceptual":[81],"learning":[82],"(EPL)":[83],"module":[84,154],"that":[85,168],"efficiently":[86],"integrates":[87],"contextual":[90,148],"streams.":[91],"It":[92],"incorporates":[93],"multi-expert":[95],"parser":[96],"complementary":[99,123],"features":[100],"multiple":[102],"perspectives.":[103],"In":[104],"addition,":[105],"coefficient":[107],"mapper":[108],"is":[109],"designed":[110],"adaptively":[112],"learn":[113,156],"pixel-specific":[114],"weights":[115],"for":[116],"each":[117,169],"feature,":[118],"integration":[121],"into":[125],"unified":[127],"robust":[129],"feature":[130,136],"embedding.":[131],"Furthermore,":[132],"propose":[134],"orthogonalization":[137],"decoupling":[138],"(FOD)":[139],"strategy":[140],"mitigate":[142],"redundancy":[143],"streams,":[149],"which":[150],"allows":[151],"EPL":[153],"diverse":[157],"orthogonalized":[160],"features.":[161],"Extensive":[162],"experiments":[163],"on":[164],"eight":[165],"benchmarks":[166],"show":[167],"block":[171],"PCA-Seg":[173],"adds":[174],"merely":[175],"0.35M":[176],"parameters":[177],"while":[178],"achieving":[179],"state-of-the-art":[180],"OSPS":[181],"performance.":[182]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-03-20T00:00:00"}
