{"id":"https://openalex.org/W4390357571","doi":"https://doi.org/10.1109/tip.2023.3345652","title":"Spatial\u2013Temporal Knowledge-Embedded Transformer for Video Scene Graph Generation","display_name":"Spatial\u2013Temporal Knowledge-Embedded Transformer for Video Scene Graph Generation","publication_year":2023,"publication_date":"2023-12-28","ids":{"openalex":"https://openalex.org/W4390357571","doi":"https://doi.org/10.1109/tip.2023.3345652","pmid":"https://pubmed.ncbi.nlm.nih.gov/38153822"},"language":"en","primary_location":{"id":"doi:10.1109/tip.2023.3345652","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tip.2023.3345652","pdf_url":null,"source":{"id":"https://openalex.org/S4210173141","display_name":"IEEE Transactions on Image Processing","issn_l":"1057-7149","issn":["1057-7149","1941-0042"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Image Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5085835397","display_name":"Tao Pu","orcid":"https://orcid.org/0000-0001-9564-8288"},"institutions":[{"id":"https://openalex.org/I157773358","display_name":"Sun Yat-sen University","ror":"https://ror.org/0064kty71","country_code":"CN","type":"education","lineage":["https://openalex.org/I157773358"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Tao Pu","raw_affiliation_strings":["School of Computer Science and Engineering, Sun Yat-sen University, Guangzhou, China"],"raw_orcid":"https://orcid.org/0000-0001-9564-8288","affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering, Sun Yat-sen University, Guangzhou, China","institution_ids":["https://openalex.org/I157773358"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5052027147","display_name":"Tianshui Chen","orcid":"https://orcid.org/0000-0002-5848-5624"},"institutions":[{"id":"https://openalex.org/I139024713","display_name":"Guangdong University of Technology","ror":"https://ror.org/04azbjn80","country_code":"CN","type":"education","lineage":["https://openalex.org/I139024713"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Tianshui Chen","raw_affiliation_strings":["School of Information Engineering, Guangdong University of Technology, Guangzhou, China"],"raw_orcid":"https://orcid.org/0000-0002-5848-5624","affiliations":[{"raw_affiliation_string":"School of Information Engineering, Guangdong University of Technology, Guangzhou, China","institution_ids":["https://openalex.org/I139024713"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5061828638","display_name":"Hefeng Wu","orcid":"https://orcid.org/0000-0002-2132-6515"},"institutions":[{"id":"https://openalex.org/I157773358","display_name":"Sun Yat-sen University","ror":"https://ror.org/0064kty71","country_code":"CN","type":"education","lineage":["https://openalex.org/I157773358"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hefeng Wu","raw_affiliation_strings":["School of Computer Science and Engineering, Sun Yat-sen University, Guangzhou, China"],"raw_orcid":"https://orcid.org/0000-0002-2132-6515","affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering, Sun Yat-sen University, Guangzhou, China","institution_ids":["https://openalex.org/I157773358"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5081352580","display_name":"Yongyi Lu","orcid":"https://orcid.org/0000-0003-1398-9965"},"institutions":[{"id":"https://openalex.org/I139024713","display_name":"Guangdong University of Technology","ror":"https://ror.org/04azbjn80","country_code":"CN","type":"education","lineage":["https://openalex.org/I139024713"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yongyi Lu","raw_affiliation_strings":["School of Information Engineering, Guangdong University of Technology, Guangzhou, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Information Engineering, Guangdong University of Technology, Guangzhou, China","institution_ids":["https://openalex.org/I139024713"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100412937","display_name":"Liang Lin","orcid":"https://orcid.org/0000-0003-2248-3755"},"institutions":[{"id":"https://openalex.org/I157773358","display_name":"Sun Yat-sen University","ror":"https://ror.org/0064kty71","country_code":"CN","type":"education","lineage":["https://openalex.org/I157773358"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Liang Lin","raw_affiliation_strings":["School of Computer Science and Engineering, Sun Yat-sen University, Guangzhou, China"],"raw_orcid":"https://orcid.org/0000-0003-2248-3755","affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering, Sun Yat-sen University, Guangzhou, China","institution_ids":["https://openalex.org/I157773358"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5085835397"],"corresponding_institution_ids":["https://openalex.org/I157773358"],"apc_list":null,"apc_paid":null,"fwci":2.8364,"has_fulltext":false,"cited_by_count":25,"citation_normalized_percentile":{"value":0.92705563,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":100},"biblio":{"volume":"33","issue":null,"first_page":"556","last_page":"568"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9973000288009644,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7830886840820312},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.664857029914856},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.611029326915741},{"id":"https://openalex.org/keywords/spatial-relation","display_name":"Spatial relation","score":0.4835662543773651},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.41599732637405396},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.3435174226760864}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7830886840820312},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.664857029914856},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.611029326915741},{"id":"https://openalex.org/C27511587","wikidata":"https://www.wikidata.org/wiki/Q2178623","display_name":"Spatial relation","level":2,"score":0.4835662543773651},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.41599732637405396},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3435174226760864}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tip.2023.3345652","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tip.2023.3345652","pdf_url":null,"source":{"id":"https://openalex.org/S4210173141","display_name":"IEEE Transactions on Image Processing","issn_l":"1057-7149","issn":["1057-7149","1941-0042"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Image Processing","raw_type":"journal-article"},{"id":"pmid:38153822","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/38153822","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on image processing : a publication of the IEEE Signal Processing Society","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1295000622","display_name":null,"funder_award_id":"61836012","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6248067210","display_name":null,"funder_award_id":"2023A1515012845","funder_id":"https://openalex.org/F4320337111","funder_display_name":"Basic and Applied Basic Research Foundation of Guangdong Province"},{"id":"https://openalex.org/G629824844","display_name":null,"funder_award_id":"2021ZD0111601","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"},{"id":"https://openalex.org/G7242000173","display_name":null,"funder_award_id":"62206060","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G7301063938","display_name":null,"funder_award_id":"SL2022A04J01626","funder_id":"https://openalex.org/F4320337111","funder_display_name":"Basic and Applied Basic Research Foundation of Guangdong Province"},{"id":"https://openalex.org/G7308144093","display_name":null,"funder_award_id":"62272494","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G7519033713","display_name":null,"funder_award_id":"2023A1515011374","funder_id":"https://openalex.org/F4320337111","funder_display_name":"Basic and Applied Basic Research Foundation of Guangdong Province"},{"id":"https://openalex.org/G7743370563","display_name":null,"funder_award_id":"23ptpy111","funder_id":"https://openalex.org/F4320335787","funder_display_name":"Fundamental Research Funds for the Central Universities"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320329883","display_name":"Guangdong Province Key Laboratory of Information Security Technology","ror":null},{"id":"https://openalex.org/F4320335777","display_name":"National Key Research and Development Program of China","ror":null},{"id":"https://openalex.org/F4320335787","display_name":"Fundamental Research Funds for the Central Universities","ror":null},{"id":"https://openalex.org/F4320337111","display_name":"Basic and Applied Basic Research Foundation of Guangdong Province","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":67,"referenced_works":["https://openalex.org/W2027963110","https://openalex.org/W2194775991","https://openalex.org/W2479423890","https://openalex.org/W2579549467","https://openalex.org/W2765137706","https://openalex.org/W2886970679","https://openalex.org/W2907214745","https://openalex.org/W2908510526","https://openalex.org/W2962785943","https://openalex.org/W2963052338","https://openalex.org/W2963150697","https://openalex.org/W2963514444","https://openalex.org/W2963536419","https://openalex.org/W2963649796","https://openalex.org/W2963938081","https://openalex.org/W2964015378","https://openalex.org/W2981385984","https://openalex.org/W2982247743","https://openalex.org/W2982515679","https://openalex.org/W2986758741","https://openalex.org/W3010593057","https://openalex.org/W3021735172","https://openalex.org/W3024031618","https://openalex.org/W3034538190","https://openalex.org/W3034679267","https://openalex.org/W3035503132","https://openalex.org/W3048602385","https://openalex.org/W3087020912","https://openalex.org/W3089555680","https://openalex.org/W3092802919","https://openalex.org/W3166712493","https://openalex.org/W3173599944","https://openalex.org/W3182902595","https://openalex.org/W3186621246","https://openalex.org/W3193767255","https://openalex.org/W3193902142","https://openalex.org/W3201861986","https://openalex.org/W3206041168","https://openalex.org/W3206818591","https://openalex.org/W3207001124","https://openalex.org/W3209229003","https://openalex.org/W3216466215","https://openalex.org/W4200498145","https://openalex.org/W4205492448","https://openalex.org/W4211103897","https://openalex.org/W4225930680","https://openalex.org/W4226060302","https://openalex.org/W4226104430","https://openalex.org/W4226342448","https://openalex.org/W4295036294","https://openalex.org/W4304080820","https://openalex.org/W4310638993","https://openalex.org/W4312271977","https://openalex.org/W4312465143","https://openalex.org/W4312561757","https://openalex.org/W4312690830","https://openalex.org/W4315473428","https://openalex.org/W4366352717","https://openalex.org/W4386071824","https://openalex.org/W4386072171","https://openalex.org/W4386757817","https://openalex.org/W6620707391","https://openalex.org/W6726873649","https://openalex.org/W6739901393","https://openalex.org/W6754725917","https://openalex.org/W6757817989","https://openalex.org/W6776977327"],"related_works":["https://openalex.org/W2772917594","https://openalex.org/W2036807459","https://openalex.org/W2058170566","https://openalex.org/W2755342338","https://openalex.org/W2166024367","https://openalex.org/W3116076068","https://openalex.org/W2229312674","https://openalex.org/W2951359407","https://openalex.org/W2079911747","https://openalex.org/W1969923398"],"abstract_inverted_index":{"Video":[0],"scene":[1,34],"graph":[2],"generation":[3],"(VidSGG)":[4],"aims":[5],"to":[6,72,100,134,145,161],"identify":[7],"objects":[8],"in":[9,116],"visual":[10,140],"scenes":[11],"and":[12,44,49,59,77,112,124,142,148,167,190],"infer":[13],"their":[14,41,50,168],"relationships":[15,51],"for":[16,157],"a":[17,24,37,84,117,180],"given":[18],"video.":[19],"It":[20],"requires":[21],"not":[22],"only":[23],"comprehensive":[25],"understanding":[26],"of":[27],"each":[28,57,158],"object":[29,47],"scattered":[30],"on":[31,192],"the":[32,91,96,130,137,143,163,185],"whole":[33],"but":[35],"also":[36],"deep":[38],"dive":[39],"into":[40,95],"temporal":[42,60,113,125],"motions":[43],"interactions.":[45],"Inherently,":[46],"pairs":[48],"enjoy":[52],"spatial":[53,110,123],"co-occurrence":[54,111],"correlations":[55,62,115],"within":[56],"image":[58],"consistency/transition":[61],"across":[63],"different":[64,193],"images,":[65],"which":[66],"can":[67],"serve":[68],"as":[69],"prior":[70,92],"knowledge":[71,94,144],"facilitate":[73],"VidSGG":[74],"model":[75],"learning":[76],"inference.":[78],"In":[79],"this":[80],"work,":[81],"we":[82,107,121,153],"propose":[83],"spatial-temporal":[85,93],"knowledge-embedded":[86,126],"transformer":[87],"(STKET)":[88],"that":[89,128,173],"incorporates":[90],"multi-head":[97,131],"cross-attention":[98,132],"mechanism":[99,133],"learn":[101,109],"more":[102],"representative":[103],"relationship":[104],"representations.":[105],"Specifically,":[106],"first":[108],"transition":[114],"statistical":[118],"manner.":[119],"Then,":[120],"design":[122],"layers":[127],"introduce":[129],"fully":[135],"explore":[136],"interaction":[138],"between":[139],"representation":[141],"generate":[146],"spatial-":[147],"temporal-embedded":[149],"representations,":[150],"respectively.":[151],"Finally,":[152],"aggregate":[154],"these":[155],"representations":[156],"subject-object":[159],"pair":[160],"predict":[162],"final":[164],"semantic":[165],"labels":[166],"relationships.":[169],"Extensive":[170],"experiments":[171],"show":[172],"STKET":[174],"outperforms":[175],"current":[176,196],"competing":[177],"algorithms":[178],"by":[179,187],"large":[181],"margin,":[182],"e.g.,":[183],"improving":[184],"mR@50":[186],"8.1%,":[188],"4.7%,":[189],"2.1%":[191],"settings":[194],"over":[195],"algorithms.":[197]},"counts_by_year":[{"year":2026,"cited_by_count":5},{"year":2025,"cited_by_count":13},{"year":2024,"cited_by_count":7}],"updated_date":"2026-05-28T09:10:13.091523","created_date":"2025-10-10T00:00:00"}
