{"id":"https://openalex.org/W4312571620","doi":"https://doi.org/10.1109/mlsp55214.2022.9943394","title":"From Object-Attribute-Relation Semantic Representation to Video Generation: A Multiple Variational Autoencoder Approach","display_name":"From Object-Attribute-Relation Semantic Representation to Video Generation: A Multiple Variational Autoencoder Approach","publication_year":2022,"publication_date":"2022-08-22","ids":{"openalex":"https://openalex.org/W4312571620","doi":"https://doi.org/10.1109/mlsp55214.2022.9943394"},"language":"en","primary_location":{"id":"doi:10.1109/mlsp55214.2022.9943394","is_oa":false,"landing_page_url":"https://doi.org/10.1109/mlsp55214.2022.9943394","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE 32nd International Workshop on Machine Learning for Signal Processing (MLSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5025137837","display_name":"Yiping Duan","orcid":"https://orcid.org/0000-0001-9638-7112"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]},{"id":"https://openalex.org/I4210156423","display_name":"National Engineering Research Center for Information Technology in Agriculture","ror":"https://ror.org/04c3j3t84","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210156423"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yiping Duan","raw_affiliation_strings":["Tsinghua University,Department of Electronic Engineering,Beijing,China","Beijing National Research Center for Information Science and Technology (BNRist), Beijing, China","Department of Electronic Engineering, Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua University,Department of Electronic Engineering,Beijing,China","institution_ids":["https://openalex.org/I99065089"]},{"raw_affiliation_string":"Beijing National Research Center for Information Science and Technology (BNRist), Beijing, China","institution_ids":["https://openalex.org/I4210156423"]},{"raw_affiliation_string":"Department of Electronic Engineering, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100775678","display_name":"Mingzhe Li","orcid":"https://orcid.org/0000-0002-4707-7588"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]},{"id":"https://openalex.org/I4210156423","display_name":"National Engineering Research Center for Information Technology in Agriculture","ror":"https://ror.org/04c3j3t84","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210156423"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Mingzhe Li","raw_affiliation_strings":["Tsinghua University,Department of Electronic Engineering,Beijing,China","Beijing National Research Center for Information Science and Technology (BNRist), Beijing, China","Department of Electronic Engineering, Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua University,Department of Electronic Engineering,Beijing,China","institution_ids":["https://openalex.org/I99065089"]},{"raw_affiliation_string":"Beijing National Research Center for Information Science and Technology (BNRist), Beijing, China","institution_ids":["https://openalex.org/I4210156423"]},{"raw_affiliation_string":"Department of Electronic Engineering, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5068419348","display_name":"Lijia Wen","orcid":null},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lijia Wen","raw_affiliation_strings":["Tsinghua University,Department of Electronic Engineering,Beijing,China","Department of Electronic Engineering, Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua University,Department of Electronic Engineering,Beijing,China","institution_ids":["https://openalex.org/I99065089"]},{"raw_affiliation_string":"Department of Electronic Engineering, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010220478","display_name":"Qianqian Yang","orcid":"https://orcid.org/0000-0003-4747-9410"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qianqian Yang","raw_affiliation_strings":["College of Information Science and Electronic Engineering, Zhejiang University,Hangzhou,China","College of Information Science and Electronic Engineering, Zhejiang University, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"College of Information Science and Electronic Engineering, Zhejiang University,Hangzhou,China","institution_ids":["https://openalex.org/I76130692"]},{"raw_affiliation_string":"College of Information Science and Electronic Engineering, Zhejiang University, Hangzhou, China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101459082","display_name":"Xiaoming Tao","orcid":"https://orcid.org/0000-0002-8763-9338"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]},{"id":"https://openalex.org/I4210156423","display_name":"National Engineering Research Center for Information Technology in Agriculture","ror":"https://ror.org/04c3j3t84","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210156423"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaoming Tao","raw_affiliation_strings":["Tsinghua University,Department of Electronic Engineering,Beijing,China","Department of Electronic Engineering, Tsinghua University, Beijing, China","Beijing National Research Center for Information Science and Technology (BNRist), Beijing, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua University,Department of Electronic Engineering,Beijing,China","institution_ids":["https://openalex.org/I99065089"]},{"raw_affiliation_string":"Department of Electronic Engineering, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]},{"raw_affiliation_string":"Beijing National Research Center for Information Science and Technology (BNRist), Beijing, China","institution_ids":["https://openalex.org/I4210156423"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5025137837"],"corresponding_institution_ids":["https://openalex.org/I4210156423","https://openalex.org/I99065089"],"apc_list":null,"apc_paid":null,"fwci":0.1006,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.40482669,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":"abs 1906 446","issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.9973999857902527,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.9973999857902527,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9973000288009644,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10531","display_name":"Advanced Vision and Imaging","score":0.9781000018119812,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/autoencoder","display_name":"Autoencoder","score":0.8707575798034668},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8283628821372986},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.6559834480285645},{"id":"https://openalex.org/keywords/relation","display_name":"Relation (database)","score":0.6070641279220581},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.6044007539749146},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5743733048439026},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.5593297481536865},{"id":"https://openalex.org/keywords/decoding-methods","display_name":"Decoding methods","score":0.5086256265640259},{"id":"https://openalex.org/keywords/fidelity","display_name":"Fidelity","score":0.4928012192249298},{"id":"https://openalex.org/keywords/probabilistic-latent-semantic-analysis","display_name":"Probabilistic latent semantic analysis","score":0.4881752133369446},{"id":"https://openalex.org/keywords/high-fidelity","display_name":"High fidelity","score":0.4683836102485657},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.45483607053756714},{"id":"https://openalex.org/keywords/encoding","display_name":"Encoding (memory)","score":0.4434337913990021},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3726024031639099},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.3721051812171936},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.35636377334594727},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.24534165859222412},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.17367666959762573},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.1418479084968567}],"concepts":[{"id":"https://openalex.org/C101738243","wikidata":"https://www.wikidata.org/wiki/Q786435","display_name":"Autoencoder","level":3,"score":0.8707575798034668},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8283628821372986},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.6559834480285645},{"id":"https://openalex.org/C25343380","wikidata":"https://www.wikidata.org/wiki/Q277521","display_name":"Relation (database)","level":2,"score":0.6070641279220581},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.6044007539749146},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5743733048439026},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.5593297481536865},{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.5086256265640259},{"id":"https://openalex.org/C2776459999","wikidata":"https://www.wikidata.org/wiki/Q2119376","display_name":"Fidelity","level":2,"score":0.4928012192249298},{"id":"https://openalex.org/C112933361","wikidata":"https://www.wikidata.org/wiki/Q2845258","display_name":"Probabilistic latent semantic analysis","level":2,"score":0.4881752133369446},{"id":"https://openalex.org/C113364801","wikidata":"https://www.wikidata.org/wiki/Q26674","display_name":"High fidelity","level":2,"score":0.4683836102485657},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.45483607053756714},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.4434337913990021},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3726024031639099},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3721051812171936},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.35636377334594727},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.24534165859222412},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.17367666959762573},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.1418479084968567},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/mlsp55214.2022.9943394","is_oa":false,"landing_page_url":"https://doi.org/10.1109/mlsp55214.2022.9943394","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE 32nd International Workshop on Machine Learning for Signal Processing (MLSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G2820823932","display_name":null,"funder_award_id":"61925105,62171257","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G7967913303","display_name":null,"funder_award_id":"2019YFB1803400","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320335777","display_name":"National Key Research and Development Program of China","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":26,"referenced_works":["https://openalex.org/W1901129140","https://openalex.org/W1959608418","https://openalex.org/W2561585794","https://openalex.org/W2752796333","https://openalex.org/W2962815021","https://openalex.org/W2963184176","https://openalex.org/W2963185199","https://openalex.org/W2963653352","https://openalex.org/W2963799213","https://openalex.org/W2963800363","https://openalex.org/W2965833116","https://openalex.org/W2971074500","https://openalex.org/W3002745024","https://openalex.org/W3090857244","https://openalex.org/W3135287914","https://openalex.org/W3186621246","https://openalex.org/W3198139956","https://openalex.org/W3204937802","https://openalex.org/W3209662401","https://openalex.org/W3212275823","https://openalex.org/W4288088427","https://openalex.org/W6639824700","https://openalex.org/W6640963894","https://openalex.org/W6753914649","https://openalex.org/W6762931180","https://openalex.org/W6769148693"],"related_works":["https://openalex.org/W4313443006","https://openalex.org/W2945374968","https://openalex.org/W4385452045","https://openalex.org/W4293777179","https://openalex.org/W2164070813","https://openalex.org/W2135608140","https://openalex.org/W2895525995","https://openalex.org/W4224231624","https://openalex.org/W2332512904","https://openalex.org/W2319626700"],"abstract_inverted_index":{"Video":[0],"generation":[1,16,59,158],"refers":[2],"to":[3,24,112,138,149,180],"synthesizing":[4],"high-resolution":[5],"video":[6,58,69,157],"from":[7],"latent":[8,20,73,115],"representations":[9,74],"or":[10],"features.":[11],"In":[12],"an":[13,76],"end-to-end":[14],"encoder-decoder":[15],"framework,":[17],"the":[18,38,89,105,114,122,151,156,162,165,170,181,185,188],"intermediate":[19],"representation":[21],"is":[22],"expected":[23],"contain":[25],"important":[26],"semantic":[27,72,131],"information":[28],"within":[29],"a":[30,52,68,97,147],"small":[31],"amount":[32],"of":[33,96,117,187],"structural":[34],"data,":[35],"such":[36],"that":[37],"generated":[39],"videos":[40,90],"have":[41],"high-fidelity":[42,86],"and":[43,81,99,104,121],"good":[44],"perceptual":[45],"quality.":[46],"With":[47],"these":[48,140],"considerations,":[49],"we":[50],"propose":[51],"multiple":[53,110,134],"variational":[54],"autoencoder":[55],"approach":[56],"for":[57],"with":[60,177],"object-attribute-relation":[61],"(OAR)":[62],"model.":[63],"The":[64],"proposed":[65,189],"framework":[66],"generates":[67],"by":[70,146,160],"decoding":[71],"in":[75,94],"OAR":[77,102],"pattern":[78],"(objects,":[79],"attributes":[80],"their":[82],"relations)":[83],"into":[84],"plausible":[85],"videos.":[87,153],"Specifically,":[88],"are":[91,127,136,143],"first":[92],"represented":[93],"terms":[95],"well-organized,":[98],"easily":[100],"parsed":[101],"structure":[103],"remaining":[106,123],"background.":[107],"We":[108,154],"use":[109],"encoders":[111],"learn":[113],"embeddings":[116],"objects,":[118],"attributes,":[119],"relations,":[120],"background":[124],"separately,":[125],"which":[126,142],"viewed":[128],"as":[129],"different":[130],"components.":[132],"Correspondingly,":[133],"decoders":[135],"used":[137],"reconstruct":[139],"components,":[141],"then":[144],"fused":[145],"UNet":[148],"generate":[150],"full":[152],"improve":[155],"quality":[159],"introducing":[161],"relations":[163],"between":[164],"objects.":[166],"Experimental":[167],"results":[168],"on":[169],"challenging":[171],"Google":[172],"Research":[173],"Football":[174],"dataset,":[175],"along":[176],"detailed":[178],"comparison":[179],"advanced":[182],"methods,":[183],"verify":[184],"effectiveness":[186],"framework.":[190]},"counts_by_year":[{"year":2024,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
