{"id":"https://openalex.org/W4392152057","doi":"https://doi.org/10.1109/tmm.2024.3369922","title":"UniTR: A Unified TRansformer-Based Framework for Co-Object and Multi-Modal Saliency Detection","display_name":"UniTR: A Unified TRansformer-Based Framework for Co-Object and Multi-Modal Saliency Detection","publication_year":2024,"publication_date":"2024-01-01","ids":{"openalex":"https://openalex.org/W4392152057","doi":"https://doi.org/10.1109/tmm.2024.3369922"},"language":"en","primary_location":{"id":"doi:10.1109/tmm.2024.3369922","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2024.3369922","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Multimedia","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5088611866","display_name":"Ruohao Guo","orcid":"https://orcid.org/0000-0002-1091-272X"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Ruohao Guo","raw_affiliation_strings":["National Key Laboratory of General Artificial Intelligence, School of Intelligence Science and Technology, Peking University, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0002-1091-272X","affiliations":[{"raw_affiliation_string":"National Key Laboratory of General Artificial Intelligence, School of Intelligence Science and Technology, Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5015981200","display_name":"Xianghua Ying","orcid":"https://orcid.org/0000-0002-9785-0727"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xianghua Ying","raw_affiliation_strings":["National Key Laboratory of General Artificial Intelligence, School of Intelligence Science and Technology, Peking University, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0002-9785-0727","affiliations":[{"raw_affiliation_string":"National Key Laboratory of General Artificial Intelligence, School of Intelligence Science and Technology, Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5054577986","display_name":"Yanyu Qi","orcid":"https://orcid.org/0009-0008-9931-7855"},"institutions":[{"id":"https://openalex.org/I52158045","display_name":"China Agricultural University","ror":"https://ror.org/04v3ywz14","country_code":"CN","type":"education","lineage":["https://openalex.org/I52158045"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yanyu Qi","raw_affiliation_strings":["College of Information and Electrical Engineering, China Agricultural University, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0008-9931-7855","affiliations":[{"raw_affiliation_string":"College of Information and Electrical Engineering, China Agricultural University, Beijing, China","institution_ids":["https://openalex.org/I52158045"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5046273191","display_name":"Liao Qu","orcid":"https://orcid.org/0000-0001-5228-0348"},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Liao Qu","raw_affiliation_strings":["Electrical and Computer Engineering Department, Carnegie Mellon University, Pittsburgh, PA, USA","Electrical and Computer Engineering department, Carnegie Mellon University, Pittsburgh, PA, USA"],"raw_orcid":"https://orcid.org/0000-0001-5228-0348","affiliations":[{"raw_affiliation_string":"Electrical and Computer Engineering Department, Carnegie Mellon University, Pittsburgh, PA, USA","institution_ids":["https://openalex.org/I74973139"]},{"raw_affiliation_string":"Electrical and Computer Engineering department, Carnegie Mellon University, Pittsburgh, PA, USA","institution_ids":["https://openalex.org/I74973139"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5088611866"],"corresponding_institution_ids":["https://openalex.org/I20231570"],"apc_list":null,"apc_paid":null,"fwci":7.2326,"has_fulltext":false,"cited_by_count":32,"citation_normalized_percentile":{"value":0.9795993,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":100},"biblio":{"volume":"26","issue":null,"first_page":"7622","last_page":"7635"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11605","display_name":"Visual Attention and Saliency Detection","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11605","display_name":"Visual Attention and Saliency Detection","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11094","display_name":"Face Recognition and Perception","score":0.9825000166893005,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T11659","display_name":"Advanced Image Fusion Techniques","score":0.9819999933242798,"subfield":{"id":"https://openalex.org/subfields/2214","display_name":"Media Technology"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8441718220710754},{"id":"https://openalex.org/keywords/modal","display_name":"Modal","score":0.6030970811843872},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.5482932329177856},{"id":"https://openalex.org/keywords/object-detection","display_name":"Object detection","score":0.4350036084651947},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.34758275747299194},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.29049041867256165},{"id":"https://openalex.org/keywords/voltage","display_name":"Voltage","score":0.17284637689590454},{"id":"https://openalex.org/keywords/electrical-engineering","display_name":"Electrical engineering","score":0.079962819814682}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8441718220710754},{"id":"https://openalex.org/C71139939","wikidata":"https://www.wikidata.org/wiki/Q910194","display_name":"Modal","level":2,"score":0.6030970811843872},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.5482932329177856},{"id":"https://openalex.org/C2776151529","wikidata":"https://www.wikidata.org/wiki/Q3045304","display_name":"Object detection","level":3,"score":0.4350036084651947},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.34758275747299194},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.29049041867256165},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.17284637689590454},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.079962819814682},{"id":"https://openalex.org/C188027245","wikidata":"https://www.wikidata.org/wiki/Q750446","display_name":"Polymer chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tmm.2024.3369922","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2024.3369922","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Multimedia","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1021494965","display_name":null,"funder_award_id":"62371009","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G4268905062","display_name":null,"funder_award_id":"61971008","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":132,"referenced_works":["https://openalex.org/W20683899","https://openalex.org/W1522734439","https://openalex.org/W1861492603","https://openalex.org/W1914179642","https://openalex.org/W1964884769","https://openalex.org/W1976409045","https://openalex.org/W1993713494","https://openalex.org/W1996140089","https://openalex.org/W2031489346","https://openalex.org/W2037227137","https://openalex.org/W2076756823","https://openalex.org/W2100470808","https://openalex.org/W2109255472","https://openalex.org/W2110019070","https://openalex.org/W2138682569","https://openalex.org/W2194775991","https://openalex.org/W2342491128","https://openalex.org/W2412782625","https://openalex.org/W2415053570","https://openalex.org/W2470139095","https://openalex.org/W2520640394","https://openalex.org/W2560023338","https://openalex.org/W2565639579","https://openalex.org/W2740667773","https://openalex.org/W2747199123","https://openalex.org/W2750988638","https://openalex.org/W2765838470","https://openalex.org/W2793029440","https://openalex.org/W2807912089","https://openalex.org/W2887486131","https://openalex.org/W2887522866","https://openalex.org/W2894890793","https://openalex.org/W2895340898","https://openalex.org/W2896011443","https://openalex.org/W2910628332","https://openalex.org/W2928165649","https://openalex.org/W2939217524","https://openalex.org/W2942049721","https://openalex.org/W2942939335","https://openalex.org/W2943125866","https://openalex.org/W2945809413","https://openalex.org/W2948937967","https://openalex.org/W2955813853","https://openalex.org/W2957414648","https://openalex.org/W2961348656","https://openalex.org/W2962772649","https://openalex.org/W2963529609","https://openalex.org/W2963868681","https://openalex.org/W2964283970","https://openalex.org/W2964429685","https://openalex.org/W2965638232","https://openalex.org/W2969626490","https://openalex.org/W2972640707","https://openalex.org/W2984122511","https://openalex.org/W2984144959","https://openalex.org/W2990844506","https://openalex.org/W2990984982","https://openalex.org/W2991487804","https://openalex.org/W2993182889","https://openalex.org/W2997217064","https://openalex.org/W2997788879","https://openalex.org/W3011305844","https://openalex.org/W3016163669","https://openalex.org/W3034320133","https://openalex.org/W3034681942","https://openalex.org/W3035284915","https://openalex.org/W3035357085","https://openalex.org/W3035666869","https://openalex.org/W3035687312","https://openalex.org/W3039479109","https://openalex.org/W3039991645","https://openalex.org/W3044364325","https://openalex.org/W3045052737","https://openalex.org/W3096289386","https://openalex.org/W3096609285","https://openalex.org/W3097053213","https://openalex.org/W3097336090","https://openalex.org/W3106587394","https://openalex.org/W3107497254","https://openalex.org/W3108043693","https://openalex.org/W3108608656","https://openalex.org/W3108812909","https://openalex.org/W3108822985","https://openalex.org/W3114152269","https://openalex.org/W3119667975","https://openalex.org/W3126725132","https://openalex.org/W3127947687","https://openalex.org/W3136965813","https://openalex.org/W3138516171","https://openalex.org/W3139633126","https://openalex.org/W3140528754","https://openalex.org/W3151130473","https://openalex.org/W3159018159","https://openalex.org/W3160566314","https://openalex.org/W3163132162","https://openalex.org/W3166092877","https://openalex.org/W3171516518","https://openalex.org/W3171757599","https://openalex.org/W3172675153","https://openalex.org/W3173349970","https://openalex.org/W3173882198","https://openalex.org/W3174178235","https://openalex.org/W3177040887","https://openalex.org/W3185043317","https://openalex.org/W3188963955","https://openalex.org/W3202242435","https://openalex.org/W3204397973","https://openalex.org/W3211246039","https://openalex.org/W3215790634","https://openalex.org/W4205288538","https://openalex.org/W4206715824","https://openalex.org/W4210786150","https://openalex.org/W4214542306","https://openalex.org/W4214654781","https://openalex.org/W4226305381","https://openalex.org/W4285161446","https://openalex.org/W4291653109","https://openalex.org/W4312372834","https://openalex.org/W4312509967","https://openalex.org/W4312567319","https://openalex.org/W4312815172","https://openalex.org/W4313160444","https://openalex.org/W4320008793","https://openalex.org/W4362654014","https://openalex.org/W4385245566","https://openalex.org/W4387187602","https://openalex.org/W6637373629","https://openalex.org/W6784094891","https://openalex.org/W6784930956","https://openalex.org/W6785582761","https://openalex.org/W6790690058","https://openalex.org/W6794295097"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2883677709","https://openalex.org/W4312842780"],"abstract_inverted_index":{"Recent":[0],"years":[1],"have":[2],"witnessed":[3],"a":[4,24,50,65,88,92,117,135],"growing":[5],"interest":[6],"in":[7],"co-object":[8],"segmentation":[9,113],"and":[10,53,146,149,168],"multi-modal":[11],"salient":[12,30],"object":[13],"detection.":[14],"Many":[15],"efforts":[16],"are":[17],"devoted":[18],"to":[19,49,57,98,129,142],"segmenting":[20],"co-existed":[21],"objects":[22,31,104],"among":[23],"group":[25],"of":[26,44,102],"images":[27],"or":[28,105,126],"detecting":[29],"from":[32,107],"different":[33,108],"modalities.":[34,109],"Albeit":[35],"the":[36,83,100,123],"appreciable":[37],"performance":[38],"achieved":[39],"on":[40,160],"respective":[41],"benchmarks,":[42],"each":[43],"these":[45],"methods":[46],"is":[47,96,140],"limited":[48],"specific":[51],"task":[52],"cannot":[54],"be":[55],"generalized":[56],"other":[58],"tasks.":[59],"In":[60],"this":[61],"paper,":[62],"we":[63,115],"develop":[64],"<bold":[66,70,76,161,164],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[67,71,77,162,165],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">Uni</b>":[68],"fied":[69],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">TR</b>":[72],"ansformer-based":[73],"framework,":[74],"namely":[75],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">UniTR</b>":[78],",":[79,167],"aiming":[80],"at":[81],"tackling":[82],"above":[84],"tasks":[85],"individually":[86],"with":[87],"unified":[89],"architecture.":[90],"Specifically,":[91],"transformer":[93],"module":[94,138],"(CoFormer)":[95],"introduced":[97],"learn":[99],"consistency":[101],"relevant":[103],"complementarity":[106],"To":[110],"generate":[111],"high-quality":[112],"maps,":[114],"adopt":[116],"dual-stream":[118],"decoding":[119],"paradigm":[120],"that":[121,155],"allows":[122],"extracted":[124],"consistent":[125],"complementary":[127],"information":[128],"better":[130],"guide":[131],"mask":[132],"prediction.":[133],"Moreover,":[134],"feature":[136],"fusion":[137],"(ZoomFormer)":[139],"designed":[141],"enhance":[143],"backbone":[144],"features":[145],"capture":[147],"multi-granularity":[148],"multi-semantic":[150],"information.":[151],"Extensive":[152],"experiments":[153],"show":[154],"our":[156],"UniTR":[157],"performs":[158],"well":[159],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">17</b>":[163],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">benchmarks</b>":[166],"surpasses":[169],"existing":[170],"state-of-the-art":[171],"approaches.":[172]},"counts_by_year":[{"year":2026,"cited_by_count":5},{"year":2025,"cited_by_count":21},{"year":2024,"cited_by_count":6}],"updated_date":"2026-05-20T08:49:12.498775","created_date":"2025-10-10T00:00:00"}
