{"id":"https://openalex.org/W4402353548","doi":"https://doi.org/10.1109/ijcnn60899.2024.10650132","title":"SC-ViT: Semantic Contrast Vision Transformer for Scene Recognition","display_name":"SC-ViT: Semantic Contrast Vision Transformer for Scene Recognition","publication_year":2024,"publication_date":"2024-06-30","ids":{"openalex":"https://openalex.org/W4402353548","doi":"https://doi.org/10.1109/ijcnn60899.2024.10650132"},"language":"en","primary_location":{"id":"doi:10.1109/ijcnn60899.2024.10650132","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/ijcnn60899.2024.10650132","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5024227794","display_name":"Jiahui Niu","orcid":"https://orcid.org/0009-0004-9205-2242"},"institutions":[{"id":"https://openalex.org/I154099455","display_name":"Shandong University","ror":"https://ror.org/0207yh398","country_code":"CN","type":"education","lineage":["https://openalex.org/I154099455"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Jiahui Niu","raw_affiliation_strings":["Shandong University,School of Control Science and Engineering,Jinan,China"],"affiliations":[{"raw_affiliation_string":"Shandong University,School of Control Science and Engineering,Jinan,China","institution_ids":["https://openalex.org/I154099455"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113292484","display_name":"Xin Ma","orcid":null},"institutions":[{"id":"https://openalex.org/I154099455","display_name":"Shandong University","ror":"https://ror.org/0207yh398","country_code":"CN","type":"education","lineage":["https://openalex.org/I154099455"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xin Ma","raw_affiliation_strings":["Shandong University,School of Control Science and Engineering,Jinan,China"],"affiliations":[{"raw_affiliation_string":"Shandong University,School of Control Science and Engineering,Jinan,China","institution_ids":["https://openalex.org/I154099455"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5083624242","display_name":"Rui Li","orcid":"https://orcid.org/0009-0000-9992-4067"},"institutions":[{"id":"https://openalex.org/I4210144143","display_name":"Inspur (China)","ror":"https://ror.org/0474p4r72","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210144143"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Rui Li","raw_affiliation_strings":["Inspur Academy of Science and Technology,Jinan,China"],"affiliations":[{"raw_affiliation_string":"Inspur Academy of Science and Technology,Jinan,China","institution_ids":["https://openalex.org/I4210144143"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5024227794"],"corresponding_institution_ids":["https://openalex.org/I154099455"],"apc_list":null,"apc_paid":null,"fwci":0.275,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.53352449,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":"34","issue":null,"first_page":"1","last_page":"8"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6966947317123413},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.5938330888748169},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5778383016586304},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.5716750621795654},{"id":"https://openalex.org/keywords/contrast","display_name":"Contrast (vision)","score":0.5311530828475952},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.12239786982536316},{"id":"https://openalex.org/keywords/electrical-engineering","display_name":"Electrical engineering","score":0.09062811732292175},{"id":"https://openalex.org/keywords/voltage","display_name":"Voltage","score":0.05913341045379639}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6966947317123413},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.5938330888748169},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5778383016586304},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.5716750621795654},{"id":"https://openalex.org/C2776502983","wikidata":"https://www.wikidata.org/wiki/Q690182","display_name":"Contrast (vision)","level":2,"score":0.5311530828475952},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.12239786982536316},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.09062811732292175},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.05913341045379639}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/ijcnn60899.2024.10650132","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/ijcnn60899.2024.10650132","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320335787","display_name":"Fundamental Research Funds for the Central Universities","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":52,"referenced_works":["https://openalex.org/W591046810","https://openalex.org/W2017814585","https://openalex.org/W2108598243","https://openalex.org/W2118259358","https://openalex.org/W2152161678","https://openalex.org/W2194775991","https://openalex.org/W2258484932","https://openalex.org/W2517292647","https://openalex.org/W2618530766","https://openalex.org/W2620629206","https://openalex.org/W2755125693","https://openalex.org/W2896457183","https://openalex.org/W2913668833","https://openalex.org/W2950487307","https://openalex.org/W2962858109","https://openalex.org/W2990341055","https://openalex.org/W3004301609","https://openalex.org/W3013004221","https://openalex.org/W3017161914","https://openalex.org/W3094502228","https://openalex.org/W3101522362","https://openalex.org/W3102084631","https://openalex.org/W3105627154","https://openalex.org/W3128592650","https://openalex.org/W3175515048","https://openalex.org/W3184735396","https://openalex.org/W3201623325","https://openalex.org/W3207506706","https://openalex.org/W4200000144","https://openalex.org/W4205284827","https://openalex.org/W4210747134","https://openalex.org/W4214493665","https://openalex.org/W4229042118","https://openalex.org/W4280496682","https://openalex.org/W4285102229","https://openalex.org/W4293680532","https://openalex.org/W4293811919","https://openalex.org/W4297775537","https://openalex.org/W4382982491","https://openalex.org/W4385245566","https://openalex.org/W4385482612","https://openalex.org/W4400027016","https://openalex.org/W4402162264","https://openalex.org/W6737664043","https://openalex.org/W6755207826","https://openalex.org/W6770666417","https://openalex.org/W6784333009","https://openalex.org/W6789753369","https://openalex.org/W6791353385","https://openalex.org/W6798805250","https://openalex.org/W6811072154","https://openalex.org/W6838393215"],"related_works":["https://openalex.org/W2058170566","https://openalex.org/W2755342338","https://openalex.org/W2772917594","https://openalex.org/W2775347418","https://openalex.org/W2166024367","https://openalex.org/W3116076068","https://openalex.org/W2229312674","https://openalex.org/W2951359407","https://openalex.org/W2079911747","https://openalex.org/W1969923398"],"abstract_inverted_index":{"Scene":[0],"recognition":[1,27],"remains":[2],"a":[3,128,158],"challenging":[4],"task":[5],"in":[6,44,127,200],"image":[7,46],"recognition.":[8,202],"Despite":[9],"the":[10,19,45,54,79,84,118,132,164,180,185],"remarkable":[11],"advances":[12],"made":[13],"by":[14],"deep":[15],"learning,":[16],"especially":[17],"with":[18,123,141,145],"emergence":[20],"of":[21,37,57,131,167],"Convolutional":[22],"Neural":[23],"Networks":[24],"(CNNs),":[25],"scene":[26,38,58,201],"continues":[28],"to":[29,34,64,148,178],"face":[30],"unresolved":[31],"issues.":[32],"Due":[33],"high":[35],"complexity":[36],"images,":[39],"merely":[40],"identifying":[41],"several":[42],"objects":[43],"is":[47,105],"insufficient":[48],"for":[49],"obtaining":[50],"accurate":[51],"results.":[52],"Furthermore,":[53],"wide":[55],"range":[56],"categories":[59],"makes":[60],"single-modality":[61],"learning":[62,160],"susceptible":[63],"confusion.":[65],"To":[66],"address":[67],"these":[68],"challenges,":[69],"we":[70,135],"propose":[71],"an":[72],"end-to-end":[73],"multimodal":[74],"network,":[75],"SC-ViT,":[76],"based":[77],"on":[78,184],"Vision":[80],"Transformer":[81],"(ViT).":[82],"Leveraging":[83],"powerful":[85],"self-attention":[86,142],"mechanism,":[87],"our":[88],"model":[89],"captures":[90],"visual":[91],"and":[92,99,114,154,169,175,188],"contextual":[93],"cues":[94],"from":[95,151],"both":[96],"RGB":[97,124,152,168],"images":[98,125,153],"semantic":[100,103,108,121,155,170],"information.":[101,156],"The":[102],"information":[104,113,122],"derived":[106],"through":[107],"segmentation,":[109],"encompassing":[110],"object":[111],"category":[112],"spatial":[115],"layout":[116],"within":[117],"scene.":[119,133,181],"Combining":[120],"results":[126],"comprehensive":[129],"representation":[130],"Specifically,":[134],"utilize":[136],"two":[137],"branches,":[138],"each":[139],"equipped":[140],"mechanisms":[143],"but":[144],"different":[146],"structures,":[147],"extract":[149],"features":[150],"Through":[157],"contrastive":[159],"framework,":[161],"SC-ViT":[162,193],"aligns":[163],"feature":[165],"representations":[166],"modalities,":[171],"enhancing":[172],"their":[173],"consistency":[174],"discriminative":[176],"power":[177],"express":[179],"Experimental":[182],"evaluations":[183],"MIT":[186],"Indoor67":[187],"SUN397":[189],"datasets":[190],"demonstrate":[191],"that":[192],"outperforms":[194],"state-of-the-art":[195],"methods,":[196],"achieving":[197],"significant":[198],"improvements":[199]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2025-12-27T23:08:20.325037","created_date":"2025-10-10T00:00:00"}
