{"id":"https://openalex.org/W4408848666","doi":"https://doi.org/10.1145/3712676.3714447","title":"SAMPL: Self-Attention Modelled Patch Learning for Efficient Visual Understanding","display_name":"SAMPL: Self-Attention Modelled Patch Learning for Efficient Visual Understanding","publication_year":2025,"publication_date":"2025-03-26","ids":{"openalex":"https://openalex.org/W4408848666","doi":"https://doi.org/10.1145/3712676.3714447"},"language":"en","primary_location":{"id":"doi:10.1145/3712676.3714447","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3712676.3714447","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 16th ACM Multimedia Systems Conference","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102825299","display_name":"Zhiming Hu","orcid":"https://orcid.org/0000-0002-5465-2819"},"institutions":[{"id":"https://openalex.org/I4210123574","display_name":"Centre for Social Innovation","ror":"https://ror.org/03trz9s27","country_code":"CA","type":"nonprofit","lineage":["https://openalex.org/I4210123574"]}],"countries":["CA"],"is_corresponding":true,"raw_author_name":"Zhiming Hu","raw_affiliation_strings":["Samsung AI Center, Toronto, Canada"],"raw_orcid":"https://orcid.org/0000-0002-5465-2819","affiliations":[{"raw_affiliation_string":"Samsung AI Center, Toronto, Canada","institution_ids":["https://openalex.org/I4210123574"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5064063862","display_name":"Salar Hosseini Khorasgani","orcid":"https://orcid.org/0000-0001-6020-3406"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Salar Hosseini Khorasgani","raw_affiliation_strings":["Tenstorrent, Toronto, Canada"],"raw_orcid":"https://orcid.org/0000-0001-6020-3406","affiliations":[{"raw_affiliation_string":"Tenstorrent, Toronto, Canada","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5065830291","display_name":"Weiming Ren","orcid":null},"institutions":[{"id":"https://openalex.org/I4210123574","display_name":"Centre for Social Innovation","ror":"https://ror.org/03trz9s27","country_code":"CA","type":"nonprofit","lineage":["https://openalex.org/I4210123574"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Weiming Ren","raw_affiliation_strings":["Samsung AI Center, Toronto, Canada"],"raw_orcid":"https://orcid.org/0009-0000-1519-6710","affiliations":[{"raw_affiliation_string":"Samsung AI Center, Toronto, Canada","institution_ids":["https://openalex.org/I4210123574"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5025164458","display_name":"Iqbal Mohomed","orcid":"https://orcid.org/0009-0008-0598-8966"},"institutions":[{"id":"https://openalex.org/I4210123574","display_name":"Centre for Social Innovation","ror":"https://ror.org/03trz9s27","country_code":"CA","type":"nonprofit","lineage":["https://openalex.org/I4210123574"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Iqbal Mohomed","raw_affiliation_strings":["Samsung AI Center, Toronto, Canada"],"raw_orcid":"https://orcid.org/0009-0008-0598-8966","affiliations":[{"raw_affiliation_string":"Samsung AI Center, Toronto, Canada","institution_ids":["https://openalex.org/I4210123574"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5102825299"],"corresponding_institution_ids":["https://openalex.org/I4210123574"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.04103161,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"159","last_page":"169"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7462713718414307},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.40928998589515686},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.40881669521331787},{"id":"https://openalex.org/keywords/cognitive-psychology","display_name":"Cognitive psychology","score":0.35244160890579224},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.1702231764793396}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7462713718414307},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.40928998589515686},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.40881669521331787},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.35244160890579224},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.1702231764793396}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3712676.3714447","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3712676.3714447","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 16th ACM Multimedia Systems Conference","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":25,"referenced_works":["https://openalex.org/W596522316","https://openalex.org/W1973264045","https://openalex.org/W2108598243","https://openalex.org/W2889469641","https://openalex.org/W2963091558","https://openalex.org/W2990503944","https://openalex.org/W3034572008","https://openalex.org/W3035296770","https://openalex.org/W3116489684","https://openalex.org/W3159481202","https://openalex.org/W3164024107","https://openalex.org/W3168124404","https://openalex.org/W3170874841","https://openalex.org/W3174402370","https://openalex.org/W3177017840","https://openalex.org/W3188427387","https://openalex.org/W3216553632","https://openalex.org/W4214614183","https://openalex.org/W4221167396","https://openalex.org/W4226407477","https://openalex.org/W4280490805","https://openalex.org/W4288055492","https://openalex.org/W4312340826","https://openalex.org/W4312947882","https://openalex.org/W4386076203"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052"],"abstract_inverted_index":{"We":[0],"study":[1],"the":[2,40,43,64,78,82,98,107,126,147,158,163,170,188],"patch":[3,119,140,166],"selection":[4,167],"problem":[5],"for":[6,139],"efficient":[7],"transformer-based":[8,103,148],"visual":[9],"understanding":[10],"wherein":[11],"a":[12,60,86,117,135,152],"sampler":[13],"can":[14,185],"be":[15],"used":[16],"to":[17,26,49,133,161],"drop":[18,72],"less":[19],"informative":[20],"patches":[21,57,76],"at":[22,191],"inference":[23],"in":[24,101,110],"order":[25],"speed":[27],"up":[28],"model":[29],"execution":[30],"on":[31,39,157,175],"resource-constrained":[32],"devices.":[33],"As":[34],"no":[35],"labels":[36],"are":[37],"available":[38],"saliency":[41],"of":[42,54,85,165],"patches,":[44],"existing":[45],"works":[46],"either":[47],"try":[48],"solve":[50],"an":[51],"auxiliary":[52],"task":[53],"locating":[55],"distinctive":[56],"or":[58],"learn":[59,134],"policy":[61,137],"network":[62,138],"through":[63],"global":[65],"image/video-level":[66],"supervision.":[67],"The":[68],"former":[69],"approach":[70],"could":[71],"redundant":[73],"but":[74],"important":[75],"while":[77,194],"latter":[79],"suffers":[80],"from":[81],"weak":[83],"supervision":[84,132],"single":[87],"class":[88],"label":[89],"per":[90],"image/video.":[91],"In":[92],"this":[93],"work,":[94],"we":[95,115,150],"observe":[96],"that":[97,124,183],"attention":[99,127,171],"weights":[100,128],"trained":[102],"models":[104],"clearly":[105],"highlight":[106],"salient":[108],"regions":[109],"images":[111],"and":[112,169,180],"videos.":[113],"Therefore,":[114],"propose":[116],"learned":[118],"sampling":[120],"framework":[121],"called":[122],"SAMPL":[123,144,184],"utilizes":[125],"as":[129],"fine-grained":[130],"patch-level":[131],"lightweight":[136],"selection.":[141],"To":[142],"train":[143],"end-to-end":[145],"with":[146],"models,":[149],"introduce":[151],"new":[153],"loss":[154],"function":[155],"based":[156],"REINFORCE":[159],"algorithm":[160],"match":[162],"distribution":[164],"probabilities":[168],"scores.":[172],"Experimental":[173],"results":[174],"ImageNet,":[176],"UCF101,":[177],"Something-Something":[178],"v2":[179],"Kinetics-400":[181],"show":[182],"effectively":[186],"increase":[187],"throughput":[189],"by":[190],"least":[192],"1.5\u00d7":[193],"achieving":[195],"competitive":[196],"classification":[197],"accuracy.":[198]},"counts_by_year":[],"updated_date":"2025-12-28T23:10:05.387466","created_date":"2025-10-10T00:00:00"}
