{"id":"https://openalex.org/W4394838869","doi":"https://doi.org/10.1145/3652583.3658095","title":"Calibration &amp; Reconstruction: Deeply Integrated Language for Referring Image Segmentation","display_name":"Calibration &amp; Reconstruction: Deeply Integrated Language for Referring Image Segmentation","publication_year":2024,"publication_date":"2024-05-30","ids":{"openalex":"https://openalex.org/W4394838869","doi":"https://doi.org/10.1145/3652583.3658095"},"language":"en","primary_location":{"id":"doi:10.1145/3652583.3658095","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3652583.3658095","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3652583.3658095","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2024 International Conference on Multimedia Retrieval","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3652583.3658095","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5103163018","display_name":"Yichen Yan","orcid":"https://orcid.org/0009-0009-7620-4214"},"institutions":[{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yichen Yan","raw_affiliation_strings":["Institute of Automation, Chinese Academy of Sciences &amp; School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0009-7620-4214","affiliations":[{"raw_affiliation_string":"Institute of Automation, Chinese Academy of Sciences &amp; School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I4210165038"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101777772","display_name":"Xingjian He","orcid":"https://orcid.org/0000-0001-5396-6253"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210094879","display_name":"Shandong Institute of Automation","ror":"https://ror.org/00qdtba35","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210094879","https://openalex.org/I4210142748"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xingjian He","raw_affiliation_strings":["Institute of Automation, Chinese Academy of Sciences, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0001-5396-6253","affiliations":[{"raw_affiliation_string":"Institute of Automation, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210094879","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5067083698","display_name":"Sihan Chen","orcid":"https://orcid.org/0009-0001-3539-8085"},"institutions":[{"id":"https://openalex.org/I4210100255","display_name":"Beijing Academy of Artificial Intelligence","ror":"https://ror.org/016a74861","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210100255"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Sihan Chen","raw_affiliation_strings":["School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0001-3539-8085","affiliations":[{"raw_affiliation_string":"School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210100255","https://openalex.org/I4210165038"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5108392430","display_name":"Jing Liu","orcid":"https://orcid.org/0000-0003-0903-9131"},"institutions":[{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jing Liu","raw_affiliation_strings":["Institute of Automation, Chinese Academy of Sciences &amp; School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0003-0903-9131","affiliations":[{"raw_affiliation_string":"Institute of Automation, Chinese Academy of Sciences &amp; School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I4210165038"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5103163018"],"corresponding_institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I4210165038"],"apc_list":null,"apc_paid":null,"fwci":0.2381,"has_fulltext":true,"cited_by_count":1,"citation_normalized_percentile":{"value":0.46307373,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"451","last_page":"459"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9972000122070312,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9970999956130981,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8438727855682373},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.7114077806472778},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.5707454085350037},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.5174987316131592},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4803515672683716},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.4576612114906311},{"id":"https://openalex.org/keywords/decoding-methods","display_name":"Decoding methods","score":0.4270875155925751},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.40127164125442505},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.17790108919143677},{"id":"https://openalex.org/keywords/voltage","display_name":"Voltage","score":0.11546915769577026}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8438727855682373},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.7114077806472778},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.5707454085350037},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.5174987316131592},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4803515672683716},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.4576612114906311},{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.4270875155925751},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.40127164125442505},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.17790108919143677},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.11546915769577026},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3652583.3658095","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3652583.3658095","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3652583.3658095","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2024 International Conference on Multimedia Retrieval","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2404.08281","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2404.08281","pdf_url":"https://arxiv.org/pdf/2404.08281","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"doi:10.1145/3652583.3658095","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3652583.3658095","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3652583.3658095","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2024 International Conference on Multimedia Retrieval","raw_type":"proceedings-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.75,"display_name":"Quality Education"}],"awards":[{"id":"https://openalex.org/G1283858192","display_name":null,"funder_award_id":"U21B2043","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G2244573216","display_name":null,"funder_award_id":"62206279","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6012944917","display_name":null,"funder_award_id":"U21B2043,62206279","funder_id":"https://openalex.org/F4320323817","funder_display_name":"Universitas Brawijaya"},{"id":"https://openalex.org/G7956454494","display_name":null,"funder_award_id":"2022ZD0118801","funder_id":"https://openalex.org/F4320323817","funder_display_name":"Universitas Brawijaya"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320323817","display_name":"Universitas Brawijaya","ror":"https://ror.org/01wk3d929"},{"id":"https://openalex.org/F4320329860","display_name":"National Science and Technology Major Project","ror":null}],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4394838869.pdf"},"referenced_works_count":23,"referenced_works":["https://openalex.org/W1861492603","https://openalex.org/W2302548814","https://openalex.org/W2489434015","https://openalex.org/W2605127024","https://openalex.org/W2798556392","https://openalex.org/W2963109634","https://openalex.org/W2964284374","https://openalex.org/W2964345792","https://openalex.org/W2964935470","https://openalex.org/W3035097537","https://openalex.org/W3093025045","https://openalex.org/W3096609285","https://openalex.org/W3138516171","https://openalex.org/W3170544306","https://openalex.org/W3172522282","https://openalex.org/W3187664142","https://openalex.org/W3211490618","https://openalex.org/W3216551675","https://openalex.org/W4200631575","https://openalex.org/W4307504011","https://openalex.org/W4312543911","https://openalex.org/W4377711491","https://openalex.org/W4393156126"],"related_works":["https://openalex.org/W4288365749","https://openalex.org/W2936497627","https://openalex.org/W3013624417","https://openalex.org/W4287826556","https://openalex.org/W3098382480","https://openalex.org/W4287598411","https://openalex.org/W3100913109","https://openalex.org/W3198458223","https://openalex.org/W3126642501","https://openalex.org/W2964413124"],"abstract_inverted_index":{"Referring":[0],"image":[1],"segmentation":[2],"aims":[3],"to":[4,9,32,41,57,130,167,206],"segment":[5],"an":[6,15],"object":[7],"referred":[8],"by":[10,79,108],"natural":[11],"language":[12,81,111,128,141,171,182],"expression":[13],"from":[14,29,160,184],"image.":[16],"The":[17],"primary":[18],"challenge":[19],"lies":[20],"in":[21,73],"the":[22,74,90,102,109,114,119,126,140,161,165,169,174,181,193],"efficient":[23],"propagation":[24],"of":[25,89,121,164,196],"fine-grained":[26],"semantic":[27],"information":[28,52,183],"textual":[30],"features":[31,72,104,129],"visual":[33],"features.":[34,112,142],"Many":[35],"recent":[36],"works":[37],"utilize":[38],"a":[39,66,96,148,153],"Transformer":[40],"address":[42],"this":[43,61],"challenge.":[44],"However,":[45],"conventional":[46],"transformer":[47,75],"decoders":[48],"can":[49,105,178],"distort":[50],"linguistic":[51],"with":[53],"deeper":[54],"layers,":[55],"leading":[56],"suboptimal":[58],"results.":[59],"In":[60,113],"paper,":[62],"we":[63,94,117,146],"introduce":[64,147],"CRFormer,":[65],"model":[67],"that":[68],"iteratively":[69,106],"calibrates":[70],"multi-modal":[71,103],"decoder.":[76],"We":[77],"start":[78],"generating":[80],"queries":[82,133,159],"using":[83],"vision":[84],"features,":[85],"emphasizing":[86],"different":[87],"aspects":[88],"input":[91,110,170],"language.":[92],"Then,":[93],"propose":[95],"novel":[97],"Calibration":[98,115],"Decoder":[99],"(CDec)":[100],"wherein":[101],"calibrated":[107],"Decoder,":[116],"use":[118],"output":[120],"each":[122],"decoder":[123,166],"layer":[124,163],"and":[125,152,172,202],"original":[127],"generate":[131],"new":[132],"for":[134],"continuous":[135],"calibration,":[136],"which":[137],"gradually":[138],"updates":[139],"Based":[143],"on":[144],"CDec,":[145],"Language":[149],"Reconstruction":[150],"Module":[151],"reconstruction":[154,175],"loss.":[155,176],"This":[156,177],"module":[157],"leverages":[158],"final":[162],"reconstruct":[168],"compute":[173],"further":[179],"prevent":[180],"being":[185],"lost":[186],"or":[187],"distorted.":[188],"Our":[189],"experiments":[190],"consistently":[191],"show":[192],"superior":[194],"performance":[195],"our":[197],"approach":[198],"across":[199],"RefCOCO,":[200],"RefCOCO+,":[201],"G-Ref":[203],"datasets":[204],"compared":[205],"state-of-the-art":[207],"methods.":[208]},"counts_by_year":[{"year":2026,"cited_by_count":1}],"updated_date":"2026-03-11T06:11:40.159057","created_date":"2024-04-16T00:00:00"}
