{"id":"https://openalex.org/W4387969604","doi":"https://doi.org/10.1145/3581783.3612096","title":"Constructing Holistic Spatio-Temporal Scene Graph for Video Semantic Role Labeling","display_name":"Constructing Holistic Spatio-Temporal Scene Graph for Video Semantic Role Labeling","publication_year":2023,"publication_date":"2023-10-26","ids":{"openalex":"https://openalex.org/W4387969604","doi":"https://doi.org/10.1145/3581783.3612096"},"language":"en","primary_location":{"id":"doi:10.1145/3581783.3612096","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3581783.3612096","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 31st ACM International Conference on Multimedia","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5029336400","display_name":"Yu Zhao","orcid":"https://orcid.org/0000-0001-8714-4151"},"institutions":[{"id":"https://openalex.org/I162868743","display_name":"Tianjin University","ror":"https://ror.org/012tb2g32","country_code":"CN","type":"education","lineage":["https://openalex.org/I162868743"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yu Zhao","raw_affiliation_strings":["Tianjin University, Tianjin, China"],"affiliations":[{"raw_affiliation_string":"Tianjin University, Tianjin, China","institution_ids":["https://openalex.org/I162868743"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5055815455","display_name":"Hao Fei","orcid":null},"institutions":[{"id":"https://openalex.org/I165932596","display_name":"National University of Singapore","ror":"https://ror.org/01tgyzw49","country_code":"SG","type":"education","lineage":["https://openalex.org/I165932596"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Hao Fei","raw_affiliation_strings":["National University of Singapore, Singapore, Singapore"],"affiliations":[{"raw_affiliation_string":"National University of Singapore, Singapore, Singapore","institution_ids":["https://openalex.org/I165932596"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5023326846","display_name":"Yixin Cao","orcid":"https://orcid.org/0000-0001-7157-6961"},"institutions":[{"id":"https://openalex.org/I79891267","display_name":"Singapore Management University","ror":"https://ror.org/050qmg959","country_code":"SG","type":"education","lineage":["https://openalex.org/I79891267"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Yixin Cao","raw_affiliation_strings":["Singapore Management University, Singapore, Singapore"],"affiliations":[{"raw_affiliation_string":"Singapore Management University, Singapore, Singapore","institution_ids":["https://openalex.org/I79891267"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101922939","display_name":"Bobo Li","orcid":"https://orcid.org/0000-0002-0513-5540"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bobo Li","raw_affiliation_strings":["Wuhan University, Wuhan, Chile"],"affiliations":[{"raw_affiliation_string":"Wuhan University, Wuhan, Chile","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004953265","display_name":"Meishan Zhang","orcid":"https://orcid.org/0000-0001-6335-1340"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Meishan Zhang","raw_affiliation_strings":["Harbin Institute of Technology (Shenzhen), Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology (Shenzhen), Shenzhen, China","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5027428789","display_name":"Jianguo Wei","orcid":"https://orcid.org/0000-0002-8964-9759"},"institutions":[{"id":"https://openalex.org/I162868743","display_name":"Tianjin University","ror":"https://ror.org/012tb2g32","country_code":"CN","type":"education","lineage":["https://openalex.org/I162868743"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jianguo Wei","raw_affiliation_strings":["Tianjin University, Tianjin, Chile"],"affiliations":[{"raw_affiliation_string":"Tianjin University, Tianjin, Chile","institution_ids":["https://openalex.org/I162868743"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103153167","display_name":"Min Zhang","orcid":"https://orcid.org/0000-0003-4659-1822"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Min Zhang","raw_affiliation_strings":["Harbin Institute of Technology (Shenzhen), Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology (Shenzhen), Shenzhen, China","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5089404640","display_name":"Tat\u2010Seng Chua","orcid":"https://orcid.org/0000-0001-6097-7807"},"institutions":[{"id":"https://openalex.org/I165932596","display_name":"National University of Singapore","ror":"https://ror.org/01tgyzw49","country_code":"SG","type":"education","lineage":["https://openalex.org/I165932596"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Tat-Seng Chua","raw_affiliation_strings":["National University of Singapore, Singapore, Singapore"],"affiliations":[{"raw_affiliation_string":"National University of Singapore, Singapore, Singapore","institution_ids":["https://openalex.org/I165932596"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5029336400"],"corresponding_institution_ids":["https://openalex.org/I162868743"],"apc_list":null,"apc_paid":null,"fwci":3.7953,"has_fulltext":false,"cited_by_count":31,"citation_normalized_percentile":{"value":0.94960673,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":97,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"5281","last_page":"5291"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10515","display_name":"Cancer-related molecular mechanisms research","score":0.9901000261306763,"subfield":{"id":"https://openalex.org/subfields/1306","display_name":"Cancer Research"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8396727442741394},{"id":"https://openalex.org/keywords/scene-graph","display_name":"Scene graph","score":0.5488324165344238},{"id":"https://openalex.org/keywords/graph","display_name":"Graph","score":0.5453798174858093},{"id":"https://openalex.org/keywords/event","display_name":"Event (particle physics)","score":0.5388407707214355},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.5049839615821838},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5044125318527222},{"id":"https://openalex.org/keywords/salient","display_name":"Salient","score":0.49993371963500977},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.4793304204940796},{"id":"https://openalex.org/keywords/event-structure","display_name":"Event structure","score":0.46997588872909546},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.239007830619812},{"id":"https://openalex.org/keywords/rendering","display_name":"Rendering (computer graphics)","score":0.09253212809562683}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8396727442741394},{"id":"https://openalex.org/C179372163","wikidata":"https://www.wikidata.org/wiki/Q1406181","display_name":"Scene graph","level":3,"score":0.5488324165344238},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.5453798174858093},{"id":"https://openalex.org/C2779662365","wikidata":"https://www.wikidata.org/wiki/Q5416694","display_name":"Event (particle physics)","level":2,"score":0.5388407707214355},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.5049839615821838},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5044125318527222},{"id":"https://openalex.org/C2780719617","wikidata":"https://www.wikidata.org/wiki/Q1030752","display_name":"Salient","level":2,"score":0.49993371963500977},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.4793304204940796},{"id":"https://openalex.org/C2777810175","wikidata":"https://www.wikidata.org/wiki/Q5416730","display_name":"Event structure","level":2,"score":0.46997588872909546},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.239007830619812},{"id":"https://openalex.org/C205711294","wikidata":"https://www.wikidata.org/wiki/Q176953","display_name":"Rendering (computer graphics)","level":2,"score":0.09253212809562683},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3581783.3612096","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3581783.3612096","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 31st ACM International Conference on Multimedia","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":46,"referenced_works":["https://openalex.org/W2007291639","https://openalex.org/W2423576022","https://openalex.org/W2507397568","https://openalex.org/W2512435841","https://openalex.org/W2579549467","https://openalex.org/W2766863698","https://openalex.org/W2970290899","https://openalex.org/W2981820411","https://openalex.org/W3033633540","https://openalex.org/W3034679267","https://openalex.org/W3034984754","https://openalex.org/W3035017890","https://openalex.org/W3093206850","https://openalex.org/W3110014757","https://openalex.org/W3118264630","https://openalex.org/W3134294468","https://openalex.org/W3141198567","https://openalex.org/W3173211653","https://openalex.org/W3173293897","https://openalex.org/W3174377922","https://openalex.org/W3175824375","https://openalex.org/W3181951703","https://openalex.org/W3186621246","https://openalex.org/W3197858989","https://openalex.org/W3217412301","https://openalex.org/W4200630531","https://openalex.org/W4224245411","https://openalex.org/W4283271696","https://openalex.org/W4283801188","https://openalex.org/W4285212262","https://openalex.org/W4292433237","https://openalex.org/W4304013653","https://openalex.org/W4304092645","https://openalex.org/W4308974728","https://openalex.org/W4312354721","https://openalex.org/W4312434878","https://openalex.org/W4312561757","https://openalex.org/W4312900708","https://openalex.org/W4313137923","https://openalex.org/W4366957502","https://openalex.org/W4367311088","https://openalex.org/W4382461696","https://openalex.org/W4382999126","https://openalex.org/W6601968593","https://openalex.org/W6602426847","https://openalex.org/W6818308342"],"related_works":["https://openalex.org/W82416983","https://openalex.org/W3202383513","https://openalex.org/W3006654357","https://openalex.org/W2948564404","https://openalex.org/W2101248173","https://openalex.org/W2092421147","https://openalex.org/W2367735043","https://openalex.org/W4318061845","https://openalex.org/W2143641004","https://openalex.org/W2740945385"],"abstract_inverted_index":{"As":[0],"one":[1],"of":[2,55,64,101,184,218,221,235],"the":[3,17,25,30,53,61,84,94,107,124,127,132,155,169,190,198,206,219],"core":[4],"video":[5,65,237],"semantic":[6,135],"understanding":[7,217,238],"tasks,":[8],"Video":[9],"Semantic":[10],"Role":[11],"Labeling":[12],"(VidSRL)":[13],"aims":[14],"to":[15,48,122,153,230],"detect":[16],"salient":[18],"events":[19],"from":[20],"given":[21],"videos,":[22],"by":[23],"recognizing":[24],"predict-argument":[26],"event":[27,134],"structures":[28],"and":[29,60,98,131,162],"interrelationships":[31],"between":[32,126],"events.":[33],"While":[34],"recent":[35],"endeavors":[36],"have":[37],"put":[38],"forth":[39],"methods":[40],"for":[41,103,214],"VidSRL,":[42],"they":[43],"can":[44,173],"be":[45],"mostly":[46],"subject":[47],"two":[49],"key":[50],"drawbacks,":[51],"including":[52],"lack":[54],"fine-grained":[56,95],"spatial":[57,96],"scene":[58,77,87,129],"perception":[59],"insufficiently":[62],"modeling":[63],"temporality.":[66],"Towards":[67],"this":[68,70],"end,":[69],"work":[71],"explores":[72],"a":[73,111,215,232],"novel":[74],"holistic":[75],"spatio-temporal":[76],"graph":[78,88,145],"(namely":[79],"HostSG)":[80],"representation":[81,172,226],"based":[82],"on":[83],"existing":[85],"dynamic":[86],"structures,":[89],"which":[90],"well":[91],"model":[92],"both":[93],"semantics":[97],"temporal":[99],"dynamics":[100],"videos":[102],"VidSRL.":[104],"Built":[105],"upon":[106],"HostSG,":[108],"we":[109],"present":[110],"nichetargeting":[112],"VidSRL":[113,185],"framework.":[114],"A":[115],"scene-event":[116,142],"mapping":[117],"mechanism":[118],"is":[119],"first":[120],"designed":[121],"bridge":[123],"gap":[125],"underlying":[128],"structure":[130,151,171],"high-level":[133],"structure,":[136],"resulting":[137],"in":[138],"an":[139],"overall":[140,170],"hierarchical":[141],"(termed":[143],"ICE)":[144],"structure.":[146],"We":[147],"further":[148],"perform":[149],"iterative":[150],"refinement":[152],"optimize":[154],"ICE":[156],"graph,":[157],"e.g.,":[158],"filtering":[159],"noisy":[160],"branches":[161],"newly":[163],"building":[164],"informative":[165],"connections,":[166],"such":[167],"that":[168],"best":[174],"coincide":[175],"with":[176],"end":[177],"task":[178],"demand.":[179],"Finally,":[180],"three":[181],"subtask":[182],"predictions":[183],"are":[186,212],"jointly":[187],"decoded,":[188],"where":[189],"end-to-end":[191],"paradigm":[192],"effectively":[193],"avoids":[194],"error":[195],"propagation.":[196],"On":[197],"benchmark":[199],"dataset,":[200],"our":[201,222],"framework":[202],"boosts":[203],"significantly":[204],"over":[205],"current":[207],"best-performing":[208],"model.":[209],"Further":[210],"analyses":[211],"shown":[213],"better":[216],"advances":[220],"methods.":[223],"Our":[224],"HostSG":[225],"shows":[227],"greater":[228],"potential":[229],"facilitate":[231],"broader":[233],"range":[234],"other":[236],"tasks.":[239]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":9},{"year":2024,"cited_by_count":21}],"updated_date":"2026-03-12T08:34:05.389933","created_date":"2025-10-10T00:00:00"}
