{"id":"https://openalex.org/W4402981691","doi":"https://doi.org/10.1109/icme57554.2024.10687964","title":"Language-Guided Semantic Alignment for Co-saliency Detection","display_name":"Language-Guided Semantic Alignment for Co-saliency Detection","publication_year":2024,"publication_date":"2024-07-15","ids":{"openalex":"https://openalex.org/W4402981691","doi":"https://doi.org/10.1109/icme57554.2024.10687964"},"language":"en","primary_location":{"id":"doi:10.1109/icme57554.2024.10687964","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icme57554.2024.10687964","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Multimedia and Expo (ICME)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5113730802","display_name":"Chuang Ding","orcid":null},"institutions":[{"id":"https://openalex.org/I200845125","display_name":"Nanjing University of Information Science and Technology","ror":"https://ror.org/02y0rxk19","country_code":"CN","type":"education","lineage":["https://openalex.org/I200845125"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Chuang Ding","raw_affiliation_strings":["Nanjing University of Information Science and Technology,Nanjing,China"],"affiliations":[{"raw_affiliation_string":"Nanjing University of Information Science and Technology,Nanjing,China","institution_ids":["https://openalex.org/I200845125"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047915254","display_name":"Yang Wu","orcid":"https://orcid.org/0000-0001-7842-9796"},"institutions":[{"id":"https://openalex.org/I200845125","display_name":"Nanjing University of Information Science and Technology","ror":"https://ror.org/02y0rxk19","country_code":"CN","type":"education","lineage":["https://openalex.org/I200845125"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yang Wu","raw_affiliation_strings":["Nanjing University of Information Science and Technology,Nanjing,China"],"affiliations":[{"raw_affiliation_string":"Nanjing University of Information Science and Technology,Nanjing,China","institution_ids":["https://openalex.org/I200845125"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100775396","display_name":"Huihui Song","orcid":"https://orcid.org/0000-0002-7275-9871"},"institutions":[{"id":"https://openalex.org/I200845125","display_name":"Nanjing University of Information Science and Technology","ror":"https://ror.org/02y0rxk19","country_code":"CN","type":"education","lineage":["https://openalex.org/I200845125"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Huihui Song","raw_affiliation_strings":["Nanjing University of Information Science and Technology,Nanjing,China"],"affiliations":[{"raw_affiliation_string":"Nanjing University of Information Science and Technology,Nanjing,China","institution_ids":["https://openalex.org/I200845125"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5075412755","display_name":"Kaihua Zhang","orcid":"https://orcid.org/0000-0002-1613-3401"},"institutions":[{"id":"https://openalex.org/I200845125","display_name":"Nanjing University of Information Science and Technology","ror":"https://ror.org/02y0rxk19","country_code":"CN","type":"education","lineage":["https://openalex.org/I200845125"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Kaihua Zhang","raw_affiliation_strings":["Nanjing University of Information Science and Technology,Nanjing,China"],"affiliations":[{"raw_affiliation_string":"Nanjing University of Information Science and Technology,Nanjing,China","institution_ids":["https://openalex.org/I200845125"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100320157","display_name":"Xu Zhang","orcid":null},"institutions":[{"id":"https://openalex.org/I4210092223","display_name":"Suzhou Vocational University","ror":"https://ror.org/00hn8pj83","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210092223"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xu Zhang","raw_affiliation_strings":["Suzhou Vocational University,Suzhou,China"],"affiliations":[{"raw_affiliation_string":"Suzhou Vocational University,Suzhou,China","institution_ids":["https://openalex.org/I4210092223"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5055656398","display_name":"Zhenhua Guo","orcid":"https://orcid.org/0000-0002-1303-6681"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhenhua Guo","raw_affiliation_strings":["Tianyijiaotong Technology Ltd.,China"],"affiliations":[{"raw_affiliation_string":"Tianyijiaotong Technology Ltd.,China","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5113730802"],"corresponding_institution_ids":["https://openalex.org/I200845125"],"apc_list":null,"apc_paid":null,"fwci":0.5248,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.66303008,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11605","display_name":"Visual Attention and Saliency Detection","score":0.9387000203132629,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11605","display_name":"Visual Attention and Saliency Detection","score":0.9387000203132629,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7987731695175171},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.6164966821670532},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5488291382789612},{"id":"https://openalex.org/keywords/semantic-computing","display_name":"Semantic computing","score":0.45368799567222595},{"id":"https://openalex.org/keywords/co-occurrence","display_name":"Co-occurrence","score":0.41869738698005676},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.32697391510009766},{"id":"https://openalex.org/keywords/semantic-web","display_name":"Semantic Web","score":0.11137527227401733}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7987731695175171},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6164966821670532},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5488291382789612},{"id":"https://openalex.org/C511149849","wikidata":"https://www.wikidata.org/wiki/Q7449051","display_name":"Semantic computing","level":3,"score":0.45368799567222595},{"id":"https://openalex.org/C154290570","wikidata":"https://www.wikidata.org/wiki/Q1756768","display_name":"Co-occurrence","level":2,"score":0.41869738698005676},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.32697391510009766},{"id":"https://openalex.org/C2129575","wikidata":"https://www.wikidata.org/wiki/Q54837","display_name":"Semantic Web","level":2,"score":0.11137527227401733}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icme57554.2024.10687964","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icme57554.2024.10687964","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Multimedia and Expo (ICME)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/10","score":0.44999998807907104,"display_name":"Reduced inequalities"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320337495","display_name":"Technology Development","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":29,"referenced_works":["https://openalex.org/W1522973599","https://openalex.org/W2100470808","https://openalex.org/W2342491128","https://openalex.org/W2560023338","https://openalex.org/W2904945062","https://openalex.org/W2963529609","https://openalex.org/W2963834057","https://openalex.org/W2963868681","https://openalex.org/W3034499925","https://openalex.org/W3035666869","https://openalex.org/W3039991645","https://openalex.org/W3096289386","https://openalex.org/W3173349970","https://openalex.org/W3174178235","https://openalex.org/W3202242435","https://openalex.org/W4221145551","https://openalex.org/W4291653109","https://openalex.org/W4313421761","https://openalex.org/W4362601361","https://openalex.org/W4362654014","https://openalex.org/W4372265872","https://openalex.org/W4386065895","https://openalex.org/W4386076585","https://openalex.org/W4386159059","https://openalex.org/W4387682353","https://openalex.org/W4390874575","https://openalex.org/W4391174825","https://openalex.org/W6631190155","https://openalex.org/W6791353385"],"related_works":["https://openalex.org/W3188962172","https://openalex.org/W3204019825","https://openalex.org/W1972465254","https://openalex.org/W2004663191","https://openalex.org/W4327772221","https://openalex.org/W2096831027","https://openalex.org/W2123558490","https://openalex.org/W2102525572","https://openalex.org/W2060049802","https://openalex.org/W4243232375"],"abstract_inverted_index":{"Previous":[0],"pure":[1],"vision":[2],"paradigm":[3,63],"for":[4,64,73,107,122,175],"co-saliency":[5],"detection":[6],"(COD)":[7],"predominantly":[8],"employs":[9],"supervised":[10],"training.":[11],"The":[12,66],"supervisory":[13],"signals":[14],"often":[15,36],"consist":[16],"of":[17,23,91,236,242],"binary":[18],"masks":[19,24],"or":[20],"a":[21,58,112,204,240],"combination":[22],"and":[25,78,93,140,185,218,230],"category":[26],"labels.":[27],"However,":[28],"constrained":[29],"by":[30],"limited":[31],"training":[32,213],"samples,":[33],"these":[34],"models":[35],"suffer":[37],"from":[38,148],"overfitting":[39],"issue,":[40],"struggling":[41],"to":[42,44,70,97,150,161,194,239],"generalize":[43],"unseen":[45],"samples.":[46],"To":[47],"this":[48,50],"end,":[49],"paper":[51],"presents":[52],"the":[53,81,86,102,127,131,138,144,190,196,211,233],"constrative":[54],"language-image":[55],"pretraining-COD":[56],"(CLIP-COD),":[57],"novel":[59],"language-guided":[60],"semantic":[61,113,146],"alignment":[62,82,114],"COD.":[65,108],"primary":[67],"objective":[68],"is":[69],"leverage":[71,85],"CLIP":[72,92,149],"aligning":[74],"concepts":[75],"between":[76,137],"language":[77,88,139,183],"images,":[79],"where":[80],"can":[83,118,129,209],"effectively":[84],"powerful":[87,145],"understanding":[89],"capability":[90],"transfer":[94,205],"its":[95],"knowledge":[96,121,147,167],"image":[98,141],"domain,":[99],"thereby":[100],"enhancing":[101],"model\u2019s":[103],"zero-shot":[104],"generalization":[105],"ability":[106],"Firstly,":[109],"we":[110,154,180,201],"propose":[111],"branch":[115],"(SAB)":[116],"that":[117,164],"learn":[119],"rich":[120],"comprehending":[123],"images":[124],"globally.":[125],"Meanwhile,":[126],"SAB":[128],"narrow":[130],"gap":[132],"in":[133],"high-dimensional":[134],"feature":[135],"space":[136],"features,":[142],"transferring":[143],"our":[151,237],"model.":[152],"Subsequently,":[153],"devise":[155],"an":[156],"intra-group":[157],"multi-fusion":[158],"module":[159],"(IMM)":[160],"capture":[162],"features":[163],"integrate":[165],"group":[166],"as":[168],"dense":[169,186],"prompts,":[170],"providing":[171],"spatial":[172],"localization":[173],"information":[174],"subsequent":[176],"fine":[177],"segmentation.":[178],"Finally,":[179],"input":[181],"sparse":[182],"prompts":[184],"mask":[187],"cues":[188],"into":[189],"pre-trained":[191],"SAM":[192],"decoder":[193],"obtain":[195],"final":[197],"COD":[198],"results.":[199],"Additionally,":[200],"further":[202],"design":[203],"optimization":[206],"adaptor,":[207],"which":[208],"reduce":[210],"model":[212],"scale,":[214],"saving":[215],"computing":[216],"resource":[217],"cost":[219],"greatly.":[220],"Extensive":[221],"experiments":[222],"on":[223],"three":[224],"benchmark":[225],"datasets,":[226],"including":[227],"CoSal2015,":[228],"CoCA,":[229],"CoSOD3k,":[231],"demonstrate":[232],"superior":[234],"performance":[235],"CLIP-COD":[238],"variety":[241],"state-of-the-art":[243],"methods.":[244]},"counts_by_year":[{"year":2025,"cited_by_count":2}],"updated_date":"2025-12-22T23:10:17.713674","created_date":"2025-10-10T00:00:00"}
