{"id":"https://openalex.org/W4416735359","doi":"https://doi.org/10.1145/3777449","title":"Implement Referring Expression Comprehension by Extending Auto-focus Lens to Locked Vision Model","display_name":"Implement Referring Expression Comprehension by Extending Auto-focus Lens to Locked Vision Model","publication_year":2025,"publication_date":"2025-11-27","ids":{"openalex":"https://openalex.org/W4416735359","doi":"https://doi.org/10.1145/3777449"},"language":"en","primary_location":{"id":"doi:10.1145/3777449","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3777449","pdf_url":null,"source":{"id":"https://openalex.org/S19610489","display_name":"ACM Transactions on Multimedia Computing Communications and Applications","issn_l":"1551-6857","issn":["1551-6857","1551-6865"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Multimedia Computing, Communications, and Applications","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5027040096","display_name":"Shiyi Zheng","orcid":"https://orcid.org/0009-0004-0101-6597"},"institutions":[{"id":"https://openalex.org/I150807315","display_name":"Guangxi University","ror":"https://ror.org/02c9qn167","country_code":"CN","type":"education","lineage":["https://openalex.org/I150807315"]},{"id":"https://openalex.org/I180726961","display_name":"Shenzhen University","ror":"https://ror.org/01vy4gh70","country_code":"CN","type":"education","lineage":["https://openalex.org/I180726961"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shiyi Zheng","raw_affiliation_strings":["School of Electrical Engineering, Guangxi University, Nanning, China and College of Computer Science and Software Engineering, Shenzhen University, Shenzhen, China","Guangxi University, China"],"raw_orcid":"https://orcid.org/0009-0004-0101-6597","affiliations":[{"raw_affiliation_string":"School of Electrical Engineering, Guangxi University, Nanning, China and College of Computer Science and Software Engineering, Shenzhen University, Shenzhen, China","institution_ids":["https://openalex.org/I180726961","https://openalex.org/I150807315"]},{"raw_affiliation_string":"Guangxi University, China","institution_ids":["https://openalex.org/I150807315"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Peizhi Zhao","orcid":"https://orcid.org/0009-0005-0853-5880"},"institutions":[{"id":"https://openalex.org/I150807315","display_name":"Guangxi University","ror":"https://ror.org/02c9qn167","country_code":"CN","type":"education","lineage":["https://openalex.org/I150807315"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Peizhi Zhao","raw_affiliation_strings":["School of Electrical Engineering, Guangxi University, Nanning, China","Guangxi University, China"],"raw_orcid":"https://orcid.org/0009-0005-0853-5880","affiliations":[{"raw_affiliation_string":"School of Electrical Engineering, Guangxi University, Nanning, China","institution_ids":["https://openalex.org/I150807315"]},{"raw_affiliation_string":"Guangxi University, China","institution_ids":["https://openalex.org/I150807315"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5061381102","display_name":"Qingbao Huang","orcid":"https://orcid.org/0000-0001-7691-347X"},"institutions":[{"id":"https://openalex.org/I150807315","display_name":"Guangxi University","ror":"https://ror.org/02c9qn167","country_code":"CN","type":"education","lineage":["https://openalex.org/I150807315"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qingbao Huang","raw_affiliation_strings":["School of Electrical Engineering, Guangxi University, Nanning, China, and Guangxi Key Laboratory of Multimedia Communications and Network Technology, Nanning, China","Guangxi University, China"],"raw_orcid":"https://orcid.org/0000-0001-7691-347X","affiliations":[{"raw_affiliation_string":"School of Electrical Engineering, Guangxi University, Nanning, China, and Guangxi Key Laboratory of Multimedia Communications and Network Technology, Nanning, China","institution_ids":["https://openalex.org/I150807315"]},{"raw_affiliation_string":"Guangxi University, China","institution_ids":["https://openalex.org/I150807315"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100953440","display_name":"Yi Cai","orcid":"https://orcid.org/0009-0004-0521-4021"},"institutions":[{"id":"https://openalex.org/I4210110997","display_name":"Ministry of Education","ror":"https://ror.org/01xexqx38","country_code":"ME","type":"government","lineage":["https://openalex.org/I4210110997"]},{"id":"https://openalex.org/I90610280","display_name":"South China University of Technology","ror":"https://ror.org/0530pts50","country_code":"CN","type":"education","lineage":["https://openalex.org/I90610280"]}],"countries":["CN","ME"],"is_corresponding":false,"raw_author_name":"Yi Cai","raw_affiliation_strings":["School of Software Engineering, South China University of Technology, Guangzhou, China and Key Laboratory of Big Dat and Intelligent Robot (SCUT), Ministry of Education, China","South China University of Technology, China and Key Laboratory of Big Dat and Intelligent Robot (SCUT), Ministry of Education, China"],"raw_orcid":"https://orcid.org/0009-0004-0521-4021","affiliations":[{"raw_affiliation_string":"School of Software Engineering, South China University of Technology, Guangzhou, China and Key Laboratory of Big Dat and Intelligent Robot (SCUT), Ministry of Education, China","institution_ids":["https://openalex.org/I90610280"]},{"raw_affiliation_string":"South China University of Technology, China and Key Laboratory of Big Dat and Intelligent Robot (SCUT), Ministry of Education, China","institution_ids":["https://openalex.org/I4210110997"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5065017752","display_name":"Haonan Cheng","orcid":"https://orcid.org/0000-0003-3407-4318"},"institutions":[{"id":"https://openalex.org/I75689368","display_name":"Communication University of China","ror":"https://ror.org/04facbs33","country_code":"CN","type":"education","lineage":["https://openalex.org/I75689368"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Haonan Cheng","raw_affiliation_strings":["The State Key Laboratory of Media Convergence and Communication, Communication University of China, Beijing, China","the State Key Laboratory of Media Convergence and Communication, China"],"raw_orcid":"https://orcid.org/0000-0003-3407-4318","affiliations":[{"raw_affiliation_string":"The State Key Laboratory of Media Convergence and Communication, Communication University of China, Beijing, China","institution_ids":["https://openalex.org/I75689368"]},{"raw_affiliation_string":"the State Key Laboratory of Media Convergence and Communication, China","institution_ids":["https://openalex.org/I75689368"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5060958969","display_name":"Qi Wu","orcid":"https://orcid.org/0000-0003-3631-256X"},"institutions":[{"id":"https://openalex.org/I5681781","display_name":"The University of Adelaide","ror":"https://ror.org/00892tw58","country_code":"AU","type":"education","lineage":["https://openalex.org/I5681781"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Qi Wu","raw_affiliation_strings":["The University of Adelaide, Adelaide, Australia","The University of Adelaide, Australia"],"raw_orcid":"https://orcid.org/0000-0003-3631-256X","affiliations":[{"raw_affiliation_string":"The University of Adelaide, Adelaide, Australia","institution_ids":["https://openalex.org/I5681781"]},{"raw_affiliation_string":"The University of Adelaide, Australia","institution_ids":["https://openalex.org/I5681781"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.9071,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.80897225,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":98},"biblio":{"volume":"22","issue":"2","first_page":"1","last_page":"24"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.8458999991416931,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.8458999991416931,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.04100000113248825,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10667","display_name":"Emotion and Mood Recognition","score":0.0210999995470047,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/comprehension","display_name":"Comprehension","score":0.7602999806404114},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.640999972820282},{"id":"https://openalex.org/keywords/point","display_name":"Point (geometry)","score":0.5138000249862671},{"id":"https://openalex.org/keywords/expression","display_name":"Expression (computer science)","score":0.5116999745368958},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.5055999755859375},{"id":"https://openalex.org/keywords/metric","display_name":"Metric (unit)","score":0.4553000032901764},{"id":"https://openalex.org/keywords/binary-number","display_name":"Binary number","score":0.44920000433921814},{"id":"https://openalex.org/keywords/lens","display_name":"Lens (geology)","score":0.43630000948905945},{"id":"https://openalex.org/keywords/source-code","display_name":"Source code","score":0.4230000078678131}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8689000010490417},{"id":"https://openalex.org/C511192102","wikidata":"https://www.wikidata.org/wiki/Q5156948","display_name":"Comprehension","level":2,"score":0.7602999806404114},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.640999972820282},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5997999906539917},{"id":"https://openalex.org/C28719098","wikidata":"https://www.wikidata.org/wiki/Q44946","display_name":"Point (geometry)","level":2,"score":0.5138000249862671},{"id":"https://openalex.org/C90559484","wikidata":"https://www.wikidata.org/wiki/Q778379","display_name":"Expression (computer science)","level":2,"score":0.5116999745368958},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.5055999755859375},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.5034999847412109},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.4553000032901764},{"id":"https://openalex.org/C48372109","wikidata":"https://www.wikidata.org/wiki/Q3913","display_name":"Binary number","level":2,"score":0.44920000433921814},{"id":"https://openalex.org/C15336307","wikidata":"https://www.wikidata.org/wiki/Q1766051","display_name":"Lens (geology)","level":2,"score":0.43630000948905945},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.4230000078678131},{"id":"https://openalex.org/C2777561058","wikidata":"https://www.wikidata.org/wiki/Q2652119","display_name":"Program comprehension","level":4,"score":0.383899986743927},{"id":"https://openalex.org/C63435697","wikidata":"https://www.wikidata.org/wiki/Q864135","display_name":"Binary code","level":3,"score":0.3779999911785126},{"id":"https://openalex.org/C2775941552","wikidata":"https://www.wikidata.org/wiki/Q25212305","display_name":"Isolation (microbiology)","level":2,"score":0.3682999908924103},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.3571000099182129},{"id":"https://openalex.org/C200220432","wikidata":"https://www.wikidata.org/wiki/Q7936208","display_name":"Vision science","level":2,"score":0.3504999876022339},{"id":"https://openalex.org/C5339829","wikidata":"https://www.wikidata.org/wiki/Q1425977","display_name":"Machine vision","level":2,"score":0.3495999872684479},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.34389999508857727},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.3244999945163727},{"id":"https://openalex.org/C43091099","wikidata":"https://www.wikidata.org/wiki/Q1067788","display_name":"Through-the-lens metering","level":3,"score":0.31869998574256897},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3084999918937683},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.2996000051498413},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.289000004529953},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.2840000092983246},{"id":"https://openalex.org/C157657479","wikidata":"https://www.wikidata.org/wiki/Q2367247","display_name":"Closed captioning","level":3,"score":0.25870001316070557},{"id":"https://openalex.org/C83665646","wikidata":"https://www.wikidata.org/wiki/Q42139305","display_name":"Feature vector","level":2,"score":0.25679999589920044}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3777449","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3777449","pdf_url":null,"source":{"id":"https://openalex.org/S19610489","display_name":"ACM Transactions on Multimedia Computing Communications and Applications","issn_l":"1551-6857","issn":["1551-6857","1551-6865"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Multimedia Computing, Communications, and Applications","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G4042899380","display_name":null,"funder_award_id":"SKLMCC2023KF005","funder_id":"https://openalex.org/F4320329139","funder_display_name":"Communication University of China"},{"id":"https://openalex.org/G6770297356","display_name":null,"funder_award_id":"62276072","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320329139","display_name":"Communication University of China","ror":"https://ror.org/04facbs33"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":24,"referenced_works":["https://openalex.org/W1903029394","https://openalex.org/W2251512949","https://openalex.org/W2904910963","https://openalex.org/W2946086442","https://openalex.org/W2963323244","https://openalex.org/W2964121718","https://openalex.org/W2984121207","https://openalex.org/W2987734933","https://openalex.org/W3034772468","https://openalex.org/W3035396860","https://openalex.org/W3087365719","https://openalex.org/W3112077297","https://openalex.org/W3138516171","https://openalex.org/W3174965650","https://openalex.org/W3206582857","https://openalex.org/W4205421564","https://openalex.org/W4283029876","https://openalex.org/W4307504011","https://openalex.org/W4313054169","https://openalex.org/W4384820618","https://openalex.org/W4386065815","https://openalex.org/W4398781982","https://openalex.org/W4404893071","https://openalex.org/W4410884675"],"related_works":[],"abstract_inverted_index":{"Referring":[0],"Expression":[1],"Comprehension":[2],"(REC)":[3],"aims":[4],"to":[5,27,54],"achieve":[6,56],"fine-grained":[7],"cross-modal":[8,77,104],"content":[9],"alignment.":[10],"The":[11],"traditional":[12],"two-stage":[13,50],"approaches,":[14],"by":[15,58],"decomposing":[16],"REC":[17,53,72,110],"into":[18,64,92],"localization":[19,57,149],"(region":[20],"proposal)":[21],"and":[22,34,79,150,162],"comprehension":[23,78,85],"(expression-based":[24],"ranking),":[25],"lead":[26],"the":[28,38,41,65,84,89,96,122,160],"isolation":[29],"of":[30,40,103,124,146,164],"continuous":[31],"image":[32],"information":[33],"heavily":[35],"rely":[36],"on":[37,156],"quality":[39],"proposals.":[42],"In":[43],"this":[44,107],"article,":[45],"we":[46,70,87],"propose":[47],"a":[48,60,101,115],"point-based":[49,76,80,166],"framework":[51],"for":[52],"quickly":[55],"inserting":[59],"language-modulated":[61],"auto-focus":[62],"module":[63],"locked":[66],"vision":[67,142],"model.":[68],"Specifically,":[69],"redefine":[71],"as":[73,100,114],"two":[74],"processes:":[75],"instance":[81],"localization.":[82],"For":[83],"stage,":[86],"reconstruct":[88],"raw":[90],"annotations":[91],"soft":[93,128],"masks":[94,129],"at":[95,173],"feature":[97],"point":[98],"level":[99],"metric":[102],"correlation.":[105],"With":[106],"indirect":[108],"metric,":[109],"can":[111,152],"be":[112,153,171],"approximated":[113],"binary":[116],"classification":[117],"problem,":[118],"which":[119,132],"fundamentally":[120],"avoids":[121],"impact":[123],"isolated":[125],"regions.":[126],"Remarkably,":[127],"are":[130],"shape-independent,":[131],"means":[133],"our":[134,165],"method":[135],"is":[136],"extremely":[137],"general.":[138],"By":[139],"switching":[140],"different":[141,144],"models,":[143],"types":[145],"predictions":[147],"(e.g.,":[148],"segmentation)":[151],"obtained.":[154],"Experiments":[155],"multiple":[157],"benchmarks":[158],"demonstrate":[159],"feasibility":[161],"potential":[163],"paradigm.":[167],"Our":[168],"code":[169],"will":[170],"public":[172],"https://github.com/VILAN-Lab/PBREC-AF":[174],".":[175]},"counts_by_year":[{"year":2026,"cited_by_count":1}],"updated_date":"2026-06-17T08:01:34.144755","created_date":"2025-11-28T00:00:00"}
