{"id":"https://openalex.org/W6906384742","doi":"https://doi.org/10.15496/publikation-76837","title":"An Analysis of the Inner Workings of Variational Autoencoders","display_name":"An Analysis of the Inner Workings of Variational Autoencoders","publication_year":2023,"publication_date":"2023-01-18","ids":{"openalex":"https://openalex.org/W6906384742","doi":"https://doi.org/10.15496/publikation-76837"},"language":"en","primary_location":{"id":"doi:10.15496/publikation-76837","is_oa":true,"landing_page_url":"https://doi.org/10.15496/publikation-76837","pdf_url":null,"source":{"id":"https://openalex.org/S7407053000","display_name":"Universit\u00e4tsbibliothek T\u00fcbingen","issn_l":null,"issn":[],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"other","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.15496/publikation-76837","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Zietlow, Urs Dominik","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Zietlow, Urs Dominik","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":true,"primary_topic":null,"topics":[],"keywords":[{"id":"https://openalex.org/keywords/autoencoder","display_name":"Autoencoder","score":0.6190999746322632},{"id":"https://openalex.org/keywords/generative-model","display_name":"Generative model","score":0.6172000169754028},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.6069999933242798},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.5623000264167786},{"id":"https://openalex.org/keywords/generative-grammar","display_name":"Generative grammar","score":0.5608000159263611},{"id":"https://openalex.org/keywords/rendering","display_name":"Rendering (computer graphics)","score":0.5138999819755554},{"id":"https://openalex.org/keywords/property","display_name":"Property (philosophy)","score":0.4925000071525574},{"id":"https://openalex.org/keywords/feature-learning","display_name":"Feature learning","score":0.4666999876499176}],"concepts":[{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6732000112533569},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6352999806404114},{"id":"https://openalex.org/C101738243","wikidata":"https://www.wikidata.org/wiki/Q786435","display_name":"Autoencoder","level":3,"score":0.6190999746322632},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.6172000169754028},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.6069999933242798},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.5623000264167786},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.5608000159263611},{"id":"https://openalex.org/C205711294","wikidata":"https://www.wikidata.org/wiki/Q176953","display_name":"Rendering (computer graphics)","level":2,"score":0.5138999819755554},{"id":"https://openalex.org/C189950617","wikidata":"https://www.wikidata.org/wiki/Q937228","display_name":"Property (philosophy)","level":2,"score":0.4925000071525574},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.4666999876499176},{"id":"https://openalex.org/C190470478","wikidata":"https://www.wikidata.org/wiki/Q2370229","display_name":"Invariant (physics)","level":2,"score":0.4074000120162964},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.3813000023365021},{"id":"https://openalex.org/C116409475","wikidata":"https://www.wikidata.org/wiki/Q1385056","display_name":"External Data Representation","level":2,"score":0.35100001096725464},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.33489999175071716},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.3124000132083893},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.31060001254081726},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.27869999408721924},{"id":"https://openalex.org/C2779231336","wikidata":"https://www.wikidata.org/wiki/Q7534724","display_name":"Sketch","level":2,"score":0.27639999985694885},{"id":"https://openalex.org/C2780719617","wikidata":"https://www.wikidata.org/wiki/Q1030752","display_name":"Salient","level":2,"score":0.2750999927520752},{"id":"https://openalex.org/C161301231","wikidata":"https://www.wikidata.org/wiki/Q3478658","display_name":"Knowledge representation and reasoning","level":2,"score":0.2694000005722046},{"id":"https://openalex.org/C136197465","wikidata":"https://www.wikidata.org/wiki/Q1729295","display_name":"Variety (cybernetics)","level":2,"score":0.26489999890327454},{"id":"https://openalex.org/C167528306","wikidata":"https://www.wikidata.org/wiki/Q5508776","display_name":"Function representation","level":3,"score":0.2646999955177307},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.2529999911785126}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.15496/publikation-76837","is_oa":true,"landing_page_url":"https://doi.org/10.15496/publikation-76837","pdf_url":null,"source":{"id":"https://openalex.org/S7407053000","display_name":"Universit\u00e4tsbibliothek T\u00fcbingen","issn_l":null,"issn":[],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.15496/publikation-76837","is_oa":true,"landing_page_url":"https://doi.org/10.15496/publikation-76837","pdf_url":null,"source":{"id":"https://openalex.org/S7407053000","display_name":"Universit\u00e4tsbibliothek T\u00fcbingen","issn_l":null,"issn":[],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Representation":[0],"learning,":[1,192],"the":[2,13,48,75,80,113,121,127,167,213,231,243,264,267,302,308],"task":[3],"of":[4,8,16,25,30,53,62,74,88,99,107,115,118,129,155,169,199,207,234,242,271,311],"extracting":[5],"meaningful":[6,55],"representations":[7,90,101,265,281,299],"high-dimensional":[9,146],"data,":[10],"lies":[11],"at":[12,212,278],"very":[14],"core":[15],"artificial":[17],"intelligence":[18],"research.":[19],"Be":[20],"it":[21],"via":[22],"implicit":[23],"training":[24],"features":[26],"in":[27,120,126,140],"a":[28,63,66,72,91,108,141,149,171,184,205,235,248,259,285,294],"variety":[29],"computer":[31],"vision":[32],"tasks,":[33],"over":[34,86,263],"more":[35],"old-school,":[36],"hand-crafted":[37],"feature":[38],"extraction":[39],"mechanisms":[40],"for,":[41],"e.g.,":[42],"eye-tracking":[43],"or":[44],"other":[45],"applications,":[46],"all":[47],"way":[49],"to":[50,175,210,240],"explicit":[51,84],"learning":[52,225,314],"semantically":[54],"data":[56,122,136,147],"representations.":[57,227],"Strictly":[58],"speaking,":[59],"any":[60],"activation":[61],"layer":[64],"within":[65],"neural":[67],"network":[68],"can":[69,251],"be":[70],"considered":[71],"representation":[73,110,151,173,191,236,250,313],"input":[76],"data.":[77],"This":[78,288],"makes":[79],"research":[81],"about":[82],"achieving":[83],"control":[85],"properties":[87],"such":[89,160],"fundamentally":[92],"attractive":[93],"task.":[94],"An":[95],"often":[96],"desired":[97],"property":[98,233],"learned":[100,244,249],"is":[102,174,183,237,273,305],"called":[103],"disentanglement.":[104],"The":[105,179],"idea":[106],"disentangled":[109,172,190,226,280],"stems":[111],"from":[112],"goal":[114,168],"separating":[116],"sources":[117],"variance":[119],"and":[123,193,221,253,300],"consolidates":[124],"itself":[125],"concept":[128],"recovering":[130],"generative":[131,142,219,309],"factors.":[132],"Assuming":[133],"that":[134,144,230],"every":[135],"has":[137],"its":[138,200,255],"origin":[139],"process":[143],"produces":[145],"given":[148,157],"low-dimensional":[150],"(e.g.,":[152],"rendering":[153],"images":[154],"people":[156],"visual":[158],"attributes,":[159],"as":[161,217,284],"hairstyle,":[162],"camera":[163],"angle,":[164],"age,":[165],"...),":[166],"finding":[170],"recover":[176],"those":[177],"attributes.":[178],"Variational":[180],"Autoencoder":[181],"(VAE)":[182],"famous":[185],"architecture":[186],"commonly":[187],"used":[188],"for":[189,224,297],"this":[194],"work":[195],"summarizes":[196],"an":[197],"analysis":[198],"inner":[201],"workings.":[202],"VAEs":[203,272,292],"achieved":[204],"lot":[206],"attention":[208],"due":[209],"their,":[211],"time,":[214],"unparalleled":[215],"performance":[216],"both":[218],"models":[220,223],"inference":[222],"However,":[228],"note":[229],"disentanglement":[232,256],"not":[238],"invariant":[239],"rotations":[241],"representation,":[245],"i.e.,":[246],"rotating":[247],"change":[252],"destroy":[254],"quality.":[257],"Given":[258],"rotationally":[260,274],"symmetric":[261],"prior":[262],"space,":[266],"idealized":[268],"objective":[269],"function":[270],"symmetric.":[275],"Their":[276],"success":[277],"producing":[279],"consequently":[282],"comes":[283],"particular":[286,295],"surprise.":[287],"thesis":[289],"discusses":[290],"why":[291],"pursue":[293],"alignment":[296,304],"their":[298],"how":[301],"chosen":[303],"correlated":[306],"with":[307],"factors":[310],"existing":[312],"datasets.":[315]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
