{"id":"https://openalex.org/W6945781521","doi":"https://doi.org/10.26083/tuprints-00026355","title":"Elements of Unsupervised Scene Understanding: Objectives, Structures, and Modalities","display_name":"Elements of Unsupervised Scene Understanding: Objectives, Structures, and Modalities","publication_year":2023,"publication_date":"2023-01-01","ids":{"openalex":"https://openalex.org/W6945781521","doi":"https://doi.org/10.26083/tuprints-00026355"},"language":"en","primary_location":{"id":"pmh:oai:tubiblio.ulb.tu-darmstadt.de:141906","is_oa":false,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4377196390","display_name":"TUbilio (Technical University of Darmstadt)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I31512782","host_organization_name":"Technische Universit\u00e4t Darmstadt","host_organization_lineage":["https://openalex.org/I31512782"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"acceptedVersion","is_accepted":true,"is_published":false,"raw_source_name":"","raw_type":"Dissertation"},"type":"article","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://tuprints.ulb.tu-darmstadt.de/26355/1/dissertation_stelzner.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Stelzner, Karl","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Stelzner, Karl","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.29140986,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":true,"primary_topic":{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.43389999866485596,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.43389999866485596,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10719","display_name":"3D Shape Modeling and Analysis","score":0.25949999690055847,"subfield":{"id":"https://openalex.org/subfields/2206","display_name":"Computational Mechanics"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10531","display_name":"Advanced Vision and Imaging","score":0.09709999710321426,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.6840999722480774},{"id":"https://openalex.org/keywords/encode","display_name":"ENCODE","score":0.6049000024795532},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.566100001335144},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.5264000296592712},{"id":"https://openalex.org/keywords/modalities","display_name":"Modalities","score":0.4864000082015991},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.48069998621940613},{"id":"https://openalex.org/keywords/feature-learning","display_name":"Feature learning","score":0.46399998664855957},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.45820000767707825},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.44920000433921814}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8015000224113464},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7354999780654907},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.6840999722480774},{"id":"https://openalex.org/C66746571","wikidata":"https://www.wikidata.org/wiki/Q1134833","display_name":"ENCODE","level":3,"score":0.6049000024795532},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.566100001335144},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.5264000296592712},{"id":"https://openalex.org/C2779903281","wikidata":"https://www.wikidata.org/wiki/Q6888026","display_name":"Modalities","level":2,"score":0.4864000082015991},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.48069998621940613},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.46399998664855957},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.45820000767707825},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4569999873638153},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.44920000433921814},{"id":"https://openalex.org/C8038995","wikidata":"https://www.wikidata.org/wiki/Q1152135","display_name":"Unsupervised learning","level":2,"score":0.37940001487731934},{"id":"https://openalex.org/C2779321571","wikidata":"https://www.wikidata.org/wiki/Q7936605","display_name":"Visual learning","level":2,"score":0.36570000648498535},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.3531999886035919},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.33709999918937683},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.32760000228881836},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.322299987077713},{"id":"https://openalex.org/C136389625","wikidata":"https://www.wikidata.org/wiki/Q334384","display_name":"Supervised learning","level":3,"score":0.3077999949455261},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.30489999055862427},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.298799991607666},{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.2890999913215637},{"id":"https://openalex.org/C22367795","wikidata":"https://www.wikidata.org/wiki/Q7625208","display_name":"Structured prediction","level":2,"score":0.28349998593330383},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.28290000557899475},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.28040000796318054},{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.26809999346733093}],"mesh":[],"locations_count":3,"locations":[{"id":"pmh:oai:tubiblio.ulb.tu-darmstadt.de:141906","is_oa":false,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4377196390","display_name":"TUbilio (Technical University of Darmstadt)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I31512782","host_organization_name":"Technische Universit\u00e4t Darmstadt","host_organization_lineage":["https://openalex.org/I31512782"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"acceptedVersion","is_accepted":true,"is_published":false,"raw_source_name":"","raw_type":"Dissertation"},{"id":"pmh:oai:tuprints.ulb.tu-darmstadt.de:26355","is_oa":true,"landing_page_url":"https://tuprints.ulb.tu-darmstadt.de/26355/1/dissertation_stelzner.pdf","pdf_url":null,"source":{"id":"https://openalex.org/S4306401590","display_name":"Technischen Universit\u00e4t Darmstadt","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I31512782","host_organization_name":"Technische Universit\u00e4t Darmstadt","host_organization_lineage":["https://openalex.org/I31512782"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Ph.D. Thesis"},{"id":"doi:10.26083/tuprints-00026355","is_oa":true,"landing_page_url":"https://doi.org/10.26083/tuprints-00026355","pdf_url":null,"source":{"id":"https://openalex.org/S7407051655","display_name":"TUprints","issn_l":null,"issn":[],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article-journal"}],"best_oa_location":{"id":"pmh:oai:tuprints.ulb.tu-darmstadt.de:26355","is_oa":true,"landing_page_url":"https://tuprints.ulb.tu-darmstadt.de/26355/1/dissertation_stelzner.pdf","pdf_url":null,"source":{"id":"https://openalex.org/S4306401590","display_name":"Technischen Universit\u00e4t Darmstadt","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I31512782","host_organization_name":"Technische Universit\u00e4t Darmstadt","host_organization_lineage":["https://openalex.org/I31512782"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Ph.D. Thesis"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Enabling":[0],"robust":[1],"interactions":[2],"between":[3,276],"automated":[4],"systems":[5,135],"and":[6,44,77,195,227,233,246,257,280],"the":[7,26,37,41,60,72,75,78,115,131,145,154,184,191,214,240,265,274,281],"real":[8,209],"world":[9,210],"is":[10,23],"a":[11,33,208,235,243],"major":[12],"goal":[13,22],"of":[14,36,46,62,74,81,90,117,133,147,161,186,216,242,267,283],"artificial":[15],"intelligence.":[16],"A":[17],"key":[18],"ingredient":[19],"towards":[20,207],"this":[21,93,95,162],"scene":[24,87,101,231],"understanding:":[25],"ability":[27],"to":[28,100,113,136,178,188,196,199,203,225],"process":[29],"visual":[30,58],"imagery":[31],"into":[32,249],"concise":[34],"representation":[35,192],"depicted":[38],"scene,":[39,245],"including":[40,284],"identity,":[42],"position,":[43],"geometry":[45,241],"objects.":[47,250],"While":[48],"supervised":[49],"deep":[50],"learning":[51,116,193,215],"approaches":[52],"have":[53],"proven":[54],"effective":[55],"at":[56],"processing":[57],"inputs,":[59],"cost":[61,282],"supplying":[63],"human":[64,140],"annotations":[65],"for":[66],"training":[67],"quickly":[68],"becomes":[69],"infeasible":[70],"as":[71,142,144],"diversity":[73],"inputs":[76],"required":[79],"level":[80],"detail":[82],"increases,":[83],"putting":[84],"full":[85],"real-world":[86],"understanding":[88],"out":[89],"reach.":[91],"For":[92],"reason,":[94],"thesis":[96],"investigates":[97],"unsupervised":[98],"methods":[99,224],"understanding.":[102],"In":[103,260],"particular,":[104,261],"we":[105,129,157,175,182,212,262],"utilize":[106],"generative":[107],"models":[108],"with":[109],"structured":[110],"latent":[111],"variables":[112],"facilitate":[114],"object-based":[118],"representations.":[119],"We":[120,221,251],"start":[121],"our":[122],"investigation":[123],"in":[124,167],"an":[125],"autoencoding":[126],"setting,":[127,163,211],"where":[128,181],"highlight":[130],"capability":[132],"such":[134],"identify":[137,158],"objects":[138],"without":[139],"supervision,":[141],"well":[143],"advantages":[146],"integrating":[148],"tractable":[149],"components":[150],"within":[151],"them.":[152],"At":[153],"same":[155],"time,":[156],"some":[159],"limitations":[160],"which":[164,237],"prevent":[165],"success":[166],"more":[168],"visually":[169],"complex":[170],"environments.":[171],"Based":[172],"on":[173,264],"this,":[174],"then":[176],"turn":[177],"video":[179],"data,":[180],"leverage":[183],"prediction":[185],"dynamics":[187],"both":[189],"regularize":[190],"task":[194],"enable":[197],"applications":[198],"reinforcement":[200],"learning.":[201],"Finally,":[202],"take":[204],"another":[205],"step":[206],"investigate":[213],"representations":[217],"encoding":[218],"3D":[219,230,272],"geometry.":[220],"discuss":[222],"various":[223,277],"encode":[226],"learn":[228],"about":[229],"structure,":[232],"present":[234],"model":[236,285],"simultaneously":[238],"infers":[239],"given":[244],"segments":[247],"it":[248],"conclude":[252],"by":[253],"discussing":[254],"future":[255],"challenges":[256],"lessons":[258],"learned.":[259],"touch":[263],"challenge":[266],"modelling":[268],"uncertainty":[269],"when":[270],"inferring":[271],"geometry,":[273],"tradeoffs":[275],"data":[278],"sources,":[279],"structure.":[286]},"counts_by_year":[],"updated_date":"2026-02-26T08:16:20.718346","created_date":"2025-10-10T00:00:00"}
