{"id":"https://openalex.org/W4399356551","doi":"https://doi.org/10.48550/arxiv.2406.00480","title":"AlignSAM: Aligning Segment Anything Model to Open Context via Reinforcement Learning","display_name":"AlignSAM: Aligning Segment Anything Model to Open Context via Reinforcement Learning","publication_year":2024,"publication_date":"2024-06-01","ids":{"openalex":"https://openalex.org/W4399356551","doi":"https://doi.org/10.48550/arxiv.2406.00480"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2406.00480","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2406.00480","pdf_url":"https://arxiv.org/pdf/2406.00480","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2406.00480","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5111028333","display_name":"Duojun Huang","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Huang, Duojun","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102762676","display_name":"Xinyu Xiong","orcid":"https://orcid.org/0000-0001-9211-3165"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xiong, Xinyu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101043659","display_name":"Jie Ma","orcid":"https://orcid.org/0009-0004-4431-3890"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ma, Jie","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101585865","display_name":"Jichang Li","orcid":"https://orcid.org/0000-0001-5778-2232"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Jichang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5075329194","display_name":"Zequn Jie","orcid":"https://orcid.org/0000-0002-3038-5891"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jie, Zequn","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5017116858","display_name":"Lin Ma","orcid":"https://orcid.org/0000-0002-7331-6132"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ma, Lin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5042965510","display_name":"Guanbin Li","orcid":"https://orcid.org/0000-0002-4805-0926"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Guanbin","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5111028333"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":1,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10444","display_name":"Context-Aware Activity Recognition Systems","score":0.9049999713897705,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10444","display_name":"Context-Aware Activity Recognition Systems","score":0.9049999713897705,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8300015926361084},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5898197293281555},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.5825170278549194},{"id":"https://openalex.org/keywords/reinforcement","display_name":"Reinforcement","score":0.4327234625816345},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.41630274057388306},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.2142186164855957},{"id":"https://openalex.org/keywords/geography","display_name":"Geography","score":0.11603140830993652},{"id":"https://openalex.org/keywords/social-psychology","display_name":"Social psychology","score":0.06839749217033386}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8300015926361084},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5898197293281555},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.5825170278549194},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.4327234625816345},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.41630274057388306},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.2142186164855957},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.11603140830993652},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.06839749217033386},{"id":"https://openalex.org/C166957645","wikidata":"https://www.wikidata.org/wiki/Q23498","display_name":"Archaeology","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2406.00480","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2406.00480","pdf_url":"https://arxiv.org/pdf/2406.00480","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"doi:10.48550/arxiv.2406.00480","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2406.00480","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2406.00480","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2406.00480","pdf_url":"https://arxiv.org/pdf/2406.00480","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4399356551.pdf","grobid_xml":"https://content.openalex.org/works/W4399356551.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W2920061524","https://openalex.org/W4310083477","https://openalex.org/W2328553770","https://openalex.org/W1977959518","https://openalex.org/W2038908348","https://openalex.org/W2107890255","https://openalex.org/W2106552856","https://openalex.org/W2145821588"],"abstract_inverted_index":{"Powered":[0],"by":[1,85,116],"massive":[2],"curated":[3],"training":[4],"data,":[5],"Segment":[6],"Anything":[7],"Model":[8],"(SAM)":[9],"has":[10],"demonstrated":[11],"its":[12,102],"impressive":[13],"generalization":[14],"capabilities":[15],"in":[16,154],"open-world":[17],"scenarios":[18],"with":[19,118],"the":[20,25,90,93,119,134,151,174,177],"guidance":[21],"of":[22,40,92,148,176],"prompts.":[23],"However,":[24],"vanilla":[26],"SAM":[27,76,94],"is":[28,48,142],"class":[29],"agnostic":[30],"and":[31,54,159],"heavily":[32],"relies":[33],"on":[34,164],"user-provided":[35],"prompts":[36,132],"to":[37,45,55,77,111,129,133,144],"segment":[38],"objects":[39],"interest.":[41],"Adapting":[42],"this":[43,61],"method":[44],"diverse":[46,97],"tasks":[47,99,156,168],"crucial":[49],"for":[50,71,74],"accurate":[51],"target":[52],"identification":[53],"avoid":[56],"suboptimal":[57],"segmentation":[58,114,167],"results.":[59],"In":[60],"paper,":[62],"we":[63],"propose":[64],"a":[65,108,124,138],"novel":[66],"framework,":[67],"termed":[68],"AlignSAM,":[69],"designed":[70],"automatic":[72],"prompting":[73,109],"aligning":[75],"an":[78,86],"open":[79],"context":[80],"through":[81],"reinforcement":[82,125],"learning.":[83],"Anchored":[84],"agent,":[87],"AlignSAM":[88,106,179],"enables":[89],"generality":[91],"model":[95],"across":[96],"downstream":[98],"while":[100],"keeping":[101],"parameters":[103],"frozen.":[104],"Specifically,":[105],"initiates":[107],"agent":[110],"iteratively":[112],"refine":[113],"predictions":[115],"interacting":[117],"foundational":[120,135],"model.":[121],"It":[122],"integrates":[123],"learning":[126],"policy":[127],"network":[128],"provide":[130,145],"informative":[131],"models.":[136],"Additionally,":[137],"semantic":[139],"recalibration":[140],"module":[141],"introduced":[143],"fine-grained":[146],"labels":[147],"prompts,":[149],"enhancing":[150],"model's":[152],"proficiency":[153],"handling":[155],"encompassing":[157],"explicit":[158],"implicit":[160],"semantics.":[161],"Experiments":[162],"conducted":[163],"various":[165],"challenging":[166],"among":[169],"existing":[170],"foundation":[171],"models":[172],"demonstrate":[173],"superiority":[175],"proposed":[178],"over":[180],"state-of-the-art":[181],"approaches.":[182],"Project":[183],"page:":[184],"\\url{https://github.com/Duojun-Huang/AlignSAM-CVPR2024}.":[185]},"counts_by_year":[{"year":2024,"cited_by_count":1}],"updated_date":"2026-03-10T16:38:18.471706","created_date":"2024-06-06T00:00:00"}
