{"id":"https://openalex.org/W4402667901","doi":"https://doi.org/10.1109/cvpr52733.2024.00309","title":"AlignSAM: Aligning Segment Anything Model to Open Context via Reinforcement Learning","display_name":"AlignSAM: Aligning Segment Anything Model to Open Context via Reinforcement Learning","publication_year":2024,"publication_date":"2024-06-16","ids":{"openalex":"https://openalex.org/W4402667901","doi":"https://doi.org/10.1109/cvpr52733.2024.00309"},"language":"en","primary_location":{"id":"doi:10.1109/cvpr52733.2024.00309","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cvpr52733.2024.00309","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5111028333","display_name":"Duojun Huang","orcid":null},"institutions":[{"id":"https://openalex.org/I157773358","display_name":"Sun Yat-sen University","ror":"https://ror.org/0064kty71","country_code":"CN","type":"education","lineage":["https://openalex.org/I157773358"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Duojun Huang","raw_affiliation_strings":["School of Computer Science and Engineering, Sun Yat-sen University,Guangzhou,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering, Sun Yat-sen University,Guangzhou,China","institution_ids":["https://openalex.org/I157773358"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102762676","display_name":"Xinyu Xiong","orcid":"https://orcid.org/0000-0001-9211-3165"},"institutions":[{"id":"https://openalex.org/I157773358","display_name":"Sun Yat-sen University","ror":"https://ror.org/0064kty71","country_code":"CN","type":"education","lineage":["https://openalex.org/I157773358"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xinyu Xiong","raw_affiliation_strings":["School of Computer Science and Engineering, Sun Yat-sen University,Guangzhou,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering, Sun Yat-sen University,Guangzhou,China","institution_ids":["https://openalex.org/I157773358"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100676363","display_name":"Jie Ma","orcid":"https://orcid.org/0000-0002-7570-9554"},"institutions":[{"id":"https://openalex.org/I157773358","display_name":"Sun Yat-sen University","ror":"https://ror.org/0064kty71","country_code":"CN","type":"education","lineage":["https://openalex.org/I157773358"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jie Ma","raw_affiliation_strings":["School of Computer Science and Engineering, Sun Yat-sen University,Guangzhou,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering, Sun Yat-sen University,Guangzhou,China","institution_ids":["https://openalex.org/I157773358"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043528779","display_name":"Jichang Li","orcid":"https://orcid.org/0000-0002-1779-1190"},"institutions":[{"id":"https://openalex.org/I157773358","display_name":"Sun Yat-sen University","ror":"https://ror.org/0064kty71","country_code":"CN","type":"education","lineage":["https://openalex.org/I157773358"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jichang Li","raw_affiliation_strings":["School of Computer Science and Engineering, Sun Yat-sen University,Guangzhou,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering, Sun Yat-sen University,Guangzhou,China","institution_ids":["https://openalex.org/I157773358"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5075329194","display_name":"Zequn Jie","orcid":"https://orcid.org/0000-0002-3038-5891"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zequn Jie","raw_affiliation_strings":["Meituan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Meituan","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100462142","display_name":"Lin Ma","orcid":"https://orcid.org/0009-0005-2568-2735"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lin Ma","raw_affiliation_strings":["Meituan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Meituan","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5042965510","display_name":"Guanbin Li","orcid":"https://orcid.org/0000-0002-4805-0926"},"institutions":[{"id":"https://openalex.org/I157773358","display_name":"Sun Yat-sen University","ror":"https://ror.org/0064kty71","country_code":"CN","type":"education","lineage":["https://openalex.org/I157773358"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Guanbin Li","raw_affiliation_strings":["School of Computer Science and Engineering, Sun Yat-sen University,Guangzhou,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering, Sun Yat-sen University,Guangzhou,China","institution_ids":["https://openalex.org/I157773358"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5111028333"],"corresponding_institution_ids":["https://openalex.org/I157773358"],"apc_list":null,"apc_paid":null,"fwci":8.9561,"has_fulltext":false,"cited_by_count":28,"citation_normalized_percentile":{"value":0.98269588,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"3205","last_page":"3215"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10273","display_name":"IoT and Edge/Fog Computing","score":0.9958000183105469,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10273","display_name":"IoT and Edge/Fog Computing","score":0.9958000183105469,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10444","display_name":"Context-Aware Activity Recognition Systems","score":0.9955000281333923,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11704","display_name":"Mobile Crowdsensing and Crowdsourcing","score":0.9919000267982483,"subfield":{"id":"https://openalex.org/subfields/1706","display_name":"Computer Science Applications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8468520045280457},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7058624625205994},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.6075953841209412},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3566173315048218},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.320021390914917},{"id":"https://openalex.org/keywords/geology","display_name":"Geology","score":0.05055660009384155}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8468520045280457},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7058624625205994},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.6075953841209412},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3566173315048218},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.320021390914917},{"id":"https://openalex.org/C127313418","wikidata":"https://www.wikidata.org/wiki/Q1069","display_name":"Geology","level":0,"score":0.05055660009384155},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/cvpr52733.2024.00309","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cvpr52733.2024.00309","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Reduced inequalities","score":0.47999998927116394,"id":"https://metadata.un.org/sdg/10"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":84,"referenced_works":["https://openalex.org/W2031489346","https://openalex.org/W2054203848","https://openalex.org/W2100470808","https://openalex.org/W2194775991","https://openalex.org/W2206865673","https://openalex.org/W2340897893","https://openalex.org/W2411802338","https://openalex.org/W2519623608","https://openalex.org/W2584265939","https://openalex.org/W2736601468","https://openalex.org/W2740667773","https://openalex.org/W2787091153","https://openalex.org/W2944041622","https://openalex.org/W2948139120","https://openalex.org/W2948670693","https://openalex.org/W2948957608","https://openalex.org/W2951423754","https://openalex.org/W2951527505","https://openalex.org/W2963529609","https://openalex.org/W2969195612","https://openalex.org/W2983727866","https://openalex.org/W2990984982","https://openalex.org/W3006246324","https://openalex.org/W3035422681","https://openalex.org/W3110598968","https://openalex.org/W3120736405","https://openalex.org/W3127859904","https://openalex.org/W3132455321","https://openalex.org/W3153051248","https://openalex.org/W3176720610","https://openalex.org/W3204290641","https://openalex.org/W3209532394","https://openalex.org/W4226018689","https://openalex.org/W4226494801","https://openalex.org/W4306820534","https://openalex.org/W4312249250","https://openalex.org/W4312651322","https://openalex.org/W4312933868","https://openalex.org/W4362692272","https://openalex.org/W4365460740","https://openalex.org/W4365606129","https://openalex.org/W4367189325","https://openalex.org/W4367365477","https://openalex.org/W4372283850","https://openalex.org/W4377865974","https://openalex.org/W4383176792","https://openalex.org/W4386072314","https://openalex.org/W4386076112","https://openalex.org/W4386076597","https://openalex.org/W4386362790","https://openalex.org/W4387245372","https://openalex.org/W4387363540","https://openalex.org/W4387642453","https://openalex.org/W4387947074","https://openalex.org/W4389664825","https://openalex.org/W4390190100","https://openalex.org/W4390873795","https://openalex.org/W4390874136","https://openalex.org/W4390874575","https://openalex.org/W4393149358","https://openalex.org/W4402667884","https://openalex.org/W4402727672","https://openalex.org/W4402753686","https://openalex.org/W6637373629","https://openalex.org/W6682137061","https://openalex.org/W6714973392","https://openalex.org/W6733472783","https://openalex.org/W6738570108","https://openalex.org/W6741002519","https://openalex.org/W6759579507","https://openalex.org/W6772057793","https://openalex.org/W6790019176","https://openalex.org/W6791353385","https://openalex.org/W6796581206","https://openalex.org/W6802987763","https://openalex.org/W6846007759","https://openalex.org/W6851607685","https://openalex.org/W6851980744","https://openalex.org/W6852445233","https://openalex.org/W6852612167","https://openalex.org/W6852694211","https://openalex.org/W6854424100","https://openalex.org/W6858037043","https://openalex.org/W6859550509"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4306904969","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2138720691","https://openalex.org/W2376932109","https://openalex.org/W4362501864","https://openalex.org/W2001405890"],"abstract_inverted_index":{"Powered":[0],"by":[1,84,115],"massive":[2],"curated":[3],"training":[4],"data,":[5],"Segment":[6],"Any-thing":[7],"Model":[8],"(SAM)":[9],"has":[10],"demonstrated":[11],"its":[12,101],"impressive":[13],"generalization":[14],"capabilities":[15],"in":[16,153],"open-world":[17],"scenarios":[18],"with":[19,117],"the":[20,25,89,92,118,133,150,173,176],"guidance":[21],"of":[22,39,91,147,175],"prompts.":[23],"However,":[24],"vanilla":[26],"SAM":[27,75,93],"is":[28,47,141],"class-agnostic":[29],"and":[30,53,158],"heavily":[31],"relies":[32],"on":[33,163],"user-provided":[34],"prompts":[35,131],"to":[36,44,54,76,110,128,132,143],"segment":[37],"objects":[38],"interest.":[40],"Adapting":[41],"this":[42,60],"method":[43],"diverse":[45,96],"tasks":[46,98,155,167],"crucial":[48],"for":[49,70,73],"accurate":[50],"target":[51],"identification":[52],"avoid":[55],"suboptimal":[56],"segmentation":[57,113,166],"results.":[58],"In":[59],"paper,":[61],"we":[62],"propose":[63],"a":[64,107,123,137],"novel":[65],"framework,":[66],"termed":[67],"AlignSAM,":[68],"designed":[69],"automatic":[71],"prompting":[72,108],"aligning":[74],"an":[77,85],"open":[78],"context":[79],"through":[80],"reinforcement":[81,124],"learning.":[82],"Anchored":[83],"agent,":[86],"AlignSAM":[87,105,178],"enables":[88],"generality":[90],"model":[94],"across":[95],"downstream":[97],"while":[99],"keeping":[100],"parameters":[102],"frozen.":[103],"Specifically,":[104],"initiates":[106],"agent":[109],"iteratively":[111],"refine":[112],"predictions":[114],"interacting":[116],"foundational":[119,134],"model.":[120],"It":[121],"integrates":[122],"learning":[125],"policy":[126],"network":[127],"provide":[129,144],"informative":[130],"models.":[135],"Additionally,":[136],"semantic":[138],"recal-ibration":[139],"module":[140],"introduced":[142],"fine-grained":[145],"labels":[146],"prompts,":[148],"enhancing":[149],"model's":[151],"proficiency":[152],"handling":[154],"encompassing":[156],"explicit":[157],"implicit":[159],"semantics.":[160],"Experiments":[161],"conducted":[162],"various":[164],"challenging":[165],"among":[168],"existing":[169],"foundation":[170],"models":[171],"demonstrate":[172],"superiority":[174],"proposed":[177],"over":[179],"state-of-the-art":[180],"approaches.":[181],"Project":[182],"page:":[183],"https://github.com/Duojun-Huang/AIignSAM-CVPR2024.":[184]},"counts_by_year":[{"year":2026,"cited_by_count":5},{"year":2025,"cited_by_count":17},{"year":2024,"cited_by_count":6}],"updated_date":"2026-05-26T13:28:51.108037","created_date":"2025-10-10T00:00:00"}
