{"id":"https://openalex.org/W7137911471","doi":"https://doi.org/10.1609/aaai.v40i31.39849","title":"Self-Supervised Learning Based on Transformed Image Reconstruction for Equivariance-Coherent Feature Representation","display_name":"Self-Supervised Learning Based on Transformed Image Reconstruction for Equivariance-Coherent Feature Representation","publication_year":2026,"publication_date":"2026-03-14","ids":{"openalex":"https://openalex.org/W7137911471","doi":"https://doi.org/10.1609/aaai.v40i31.39849"},"language":null,"primary_location":{"id":"doi:10.1609/aaai.v40i31.39849","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i31.39849","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://doi.org/10.1609/aaai.v40i31.39849","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Qin Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Qin Wang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Alessio Quercia","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Alessio Quercia","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Benjamin Bruns","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Benjamin Bruns","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Abigail Morrison","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Abigail Morrison","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Hanno Scharr","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hanno Scharr","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":null,"display_name":"Kai Krajsek","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kai Krajsek","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":110.4706,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.99413958,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":99},"biblio":{"volume":"40","issue":"31","first_page":"26425","last_page":"26434"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.8138999938964844,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.8138999938964844,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.040800001472234726,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.015399999916553497,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.5961999893188477},{"id":"https://openalex.org/keywords/equivariant-map","display_name":"Equivariant map","score":0.5928999781608582},{"id":"https://openalex.org/keywords/invariant","display_name":"Invariant (physics)","score":0.588100016117096},{"id":"https://openalex.org/keywords/transformation","display_name":"Transformation (genetics)","score":0.5877000093460083},{"id":"https://openalex.org/keywords/rigid-transformation","display_name":"Rigid transformation","score":0.5023999810218811},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.4894999861717224},{"id":"https://openalex.org/keywords/feature-learning","display_name":"Feature learning","score":0.46869999170303345},{"id":"https://openalex.org/keywords/feature-vector","display_name":"Feature vector","score":0.41269999742507935}],"concepts":[{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.652400016784668},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6295999884605408},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.5961999893188477},{"id":"https://openalex.org/C171036898","wikidata":"https://www.wikidata.org/wiki/Q256355","display_name":"Equivariant map","level":2,"score":0.5928999781608582},{"id":"https://openalex.org/C190470478","wikidata":"https://www.wikidata.org/wiki/Q2370229","display_name":"Invariant (physics)","level":2,"score":0.588100016117096},{"id":"https://openalex.org/C204241405","wikidata":"https://www.wikidata.org/wiki/Q461499","display_name":"Transformation (genetics)","level":3,"score":0.5877000093460083},{"id":"https://openalex.org/C126795593","wikidata":"https://www.wikidata.org/wiki/Q7333813","display_name":"Rigid transformation","level":2,"score":0.5023999810218811},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.49729999899864197},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.4894999861717224},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.46869999170303345},{"id":"https://openalex.org/C83665646","wikidata":"https://www.wikidata.org/wiki/Q42139305","display_name":"Feature vector","level":2,"score":0.41269999742507935},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.4083000123500824},{"id":"https://openalex.org/C49766605","wikidata":"https://www.wikidata.org/wiki/Q207643","display_name":"Linear map","level":2,"score":0.39320001006126404},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.39239999651908875},{"id":"https://openalex.org/C141379421","wikidata":"https://www.wikidata.org/wiki/Q6094427","display_name":"Iterative reconstruction","level":2,"score":0.34279999136924744},{"id":"https://openalex.org/C74050887","wikidata":"https://www.wikidata.org/wiki/Q848368","display_name":"Rotation (mathematics)","level":2,"score":0.32710000872612},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.3183000087738037},{"id":"https://openalex.org/C2780598303","wikidata":"https://www.wikidata.org/wiki/Q65921492","display_name":"Flexibility (engineering)","level":2,"score":0.3075999915599823},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.2759999930858612},{"id":"https://openalex.org/C9417928","wikidata":"https://www.wikidata.org/wiki/Q1070689","display_name":"Image processing","level":3,"score":0.275299996137619},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.26429998874664307},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.2587999999523163},{"id":"https://openalex.org/C56435381","wikidata":"https://www.wikidata.org/wiki/Q1196371","display_name":"Geometric transformation","level":3,"score":0.25519999861717224}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1609/aaai.v40i31.39849","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i31.39849","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1609/aaai.v40i31.39849","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i31.39849","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Self-supervised":[0],"learning":[1,9,38],"(SSL)":[2],"methods":[3,193,221],"have":[4],"achieved":[5],"remarkable":[6],"success":[7],"in":[8,14,44,133],"image":[10,66],"representations":[11,83],"allowing":[12],"invariances":[13],"them":[15,159],"\u2014":[16],"but":[17],"therefore":[18,140],"discarding":[19],"transformation":[20,63,86,109,130,146],"information":[21,131],"that":[22,51,80],"some":[23],"computer":[24],"vision":[25],"tasks":[26,182],"actually":[27],"require.":[28],"While":[29],"recent":[30],"approaches":[31],"attempt":[32],"to":[33,102],"address":[34],"this":[35],"limitation":[36],"by":[37],"equivariant":[39,156,223],"features":[40,142],"using":[41],"linear":[42],"operators":[43],"feature":[45,68,151],"space,":[46],"they":[47],"impose":[48],"restrictive":[49],"assumptions":[50],"constrain":[52],"flexibility":[53],"and":[54,67,121,139,155,164,196,208],"generalization.":[55],"We":[56,73,168],"introduce":[57],"a":[58,75,215],"weaker":[59],"definition":[60],"for":[61,218],"the":[62,119,129,144],"relation":[64],"between":[65],"space":[69],"denoted":[70],"as":[71],"equivariance-coherence.":[72],"propose":[74],"novel":[76],"SSL":[77,96,162,192,220],"auxillary":[78],"task":[79],"learns":[81],"equivariance-coherent":[82],"through":[84],"intermediate":[85,106,126],"reconstruction,":[87],"which":[88],"can":[89],"be":[90],"integrated":[91],"with":[92,160,190,222],"existing":[93,191],"joint":[94],"embedding":[95],"methods.":[97],"Our":[98,148,212],"key":[99],"idea":[100],"is":[101],"reconstruct":[103,118],"images":[104],"at":[105],"points":[107],"along":[108],"paths,":[110],"e.g.":[111],"when":[112],"training":[113,158],"on":[114,172,180],"30\u00b0":[115],"rotations,":[116],"we":[117],"10\u00b0":[120],"20\u00b0":[122],"rotation":[123],"states.":[124],"Reconstructing":[125],"states":[127],"requires":[128],"used":[132],"augmentations,":[134],"rather":[135],"than":[136],"suppressing":[137],"it,":[138],"fosters":[141],"containing":[143],"augmented":[145],"information.":[147],"method":[149],"decomposes":[150],"vectors":[152],"into":[153],"invariant":[154,184,227],"parts,":[157],"standard":[161],"losses":[163],"reconstruction":[165],"losses,":[166],"respectively.":[167],"demonstrate":[169],"substantial":[170],"improvements":[171],"synthetic":[173],"equivariance":[174],"benchmarks":[175],"while":[176,225],"maintaining":[177],"competitive":[178],"performance":[179,199],"downstream":[181],"requiring":[183],"representations.":[185],"The":[186],"approach":[187],"seamlessly":[188],"integrates":[189],"(iBOT,":[194],"DINOv2)":[195],"consistently":[197],"enhances":[198],"across":[200],"diverse":[201],"tasks,":[202],"including":[203],"segmentation,":[204],"detection,":[205],"depth":[206],"estimation,":[207],"video":[209],"dense":[210],"prediction.":[211],"framework":[213],"provides":[214],"practical":[216],"way":[217],"augmenting":[219],"capabilities":[224],"preserving":[226],"performance.":[228]},"counts_by_year":[{"year":2026,"cited_by_count":1}],"updated_date":"2026-04-06T07:47:59.780226","created_date":"2026-02-13T00:00:00"}
