{"id":"https://openalex.org/W4403791241","doi":"https://doi.org/10.1145/3664647.3681061","title":"Open-Vocabulary Video Scene Graph Generation via Union-aware Semantic Alignment","display_name":"Open-Vocabulary Video Scene Graph Generation via Union-aware Semantic Alignment","publication_year":2024,"publication_date":"2024-10-26","ids":{"openalex":"https://openalex.org/W4403791241","doi":"https://doi.org/10.1145/3664647.3681061"},"language":"en","primary_location":{"id":"doi:10.1145/3664647.3681061","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3664647.3681061","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 32nd ACM International Conference on Multimedia","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5020295861","display_name":"Ziyue Wu","orcid":"https://orcid.org/0000-0001-5015-0232"},"institutions":[{"id":"https://openalex.org/I136765683","display_name":"Tianjin University of Technology","ror":"https://ror.org/00zbe0w13","country_code":"CN","type":"education","lineage":["https://openalex.org/I136765683"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Ziyue Wu","raw_affiliation_strings":["Tianjin University of Technology, Tianjin, China"],"affiliations":[{"raw_affiliation_string":"Tianjin University of Technology, Tianjin, China","institution_ids":["https://openalex.org/I136765683"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014526931","display_name":"Junyu Gao","orcid":"https://orcid.org/0000-0002-8105-5497"},"institutions":[{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Junyu Gao","raw_affiliation_strings":["MAIS, Institute of Automation, CAS &amp; School of Artificial Intelligence, UCAS, Beijing, China"],"affiliations":[{"raw_affiliation_string":"MAIS, Institute of Automation, CAS &amp; School of Artificial Intelligence, UCAS, Beijing, China","institution_ids":["https://openalex.org/I4210112150"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5022636178","display_name":"Changsheng Xu","orcid":"https://orcid.org/0000-0001-8343-9665"},"institutions":[{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]},{"id":"https://openalex.org/I4210136793","display_name":"Peng Cheng Laboratory","ror":"https://ror.org/03qdqbt06","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210136793"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Changsheng Xu","raw_affiliation_strings":["MAIS, Institute of Automation, CAS &amp; School of Artificial Intelligence, UCAS, &amp; Peng Cheng Laboratory, Beijing, China"],"affiliations":[{"raw_affiliation_string":"MAIS, Institute of Automation, CAS &amp; School of Artificial Intelligence, UCAS, &amp; Peng Cheng Laboratory, Beijing, China","institution_ids":["https://openalex.org/I4210136793","https://openalex.org/I4210112150"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5020295861"],"corresponding_institution_ids":["https://openalex.org/I136765683"],"apc_list":null,"apc_paid":null,"fwci":1.3121,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.82188181,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"8566","last_page":"8575"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.79178786277771},{"id":"https://openalex.org/keywords/vocabulary","display_name":"Vocabulary","score":0.620475172996521},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5268746018409729},{"id":"https://openalex.org/keywords/graph","display_name":"Graph","score":0.5014610290527344},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.48205265402793884},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.3661290407180786},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.1479133665561676},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.11684229969978333}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.79178786277771},{"id":"https://openalex.org/C2777601683","wikidata":"https://www.wikidata.org/wiki/Q6499736","display_name":"Vocabulary","level":2,"score":0.620475172996521},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5268746018409729},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.5014610290527344},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.48205265402793884},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.3661290407180786},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.1479133665561676},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.11684229969978333},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3664647.3681061","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3664647.3681061","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 32nd ACM International Conference on Multimedia","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":44,"referenced_works":["https://openalex.org/W2077069816","https://openalex.org/W2117539524","https://openalex.org/W2579549467","https://openalex.org/W2765137706","https://openalex.org/W2904378456","https://openalex.org/W2951323451","https://openalex.org/W2954137266","https://openalex.org/W2962766617","https://openalex.org/W2963518342","https://openalex.org/W2963536419","https://openalex.org/W2981385984","https://openalex.org/W3016459781","https://openalex.org/W3034221024","https://openalex.org/W3035503132","https://openalex.org/W3035517717","https://openalex.org/W3043840704","https://openalex.org/W3093028502","https://openalex.org/W3109142545","https://openalex.org/W3118923280","https://openalex.org/W3162694035","https://openalex.org/W3170622629","https://openalex.org/W3173859428","https://openalex.org/W3180463990","https://openalex.org/W3193302808","https://openalex.org/W3193902142","https://openalex.org/W3205572000","https://openalex.org/W3206633059","https://openalex.org/W3217340782","https://openalex.org/W4200630194","https://openalex.org/W4212774754","https://openalex.org/W4214879921","https://openalex.org/W4293733630","https://openalex.org/W4312465143","https://openalex.org/W4312873085","https://openalex.org/W4312890493","https://openalex.org/W4312956471","https://openalex.org/W4313186260","https://openalex.org/W4385572897","https://openalex.org/W4386066010","https://openalex.org/W4386071767","https://openalex.org/W4386432237","https://openalex.org/W4387968099","https://openalex.org/W4388756793","https://openalex.org/W4391216149"],"related_works":["https://openalex.org/W2349784553","https://openalex.org/W3022596247","https://openalex.org/W2601444686","https://openalex.org/W4307058054","https://openalex.org/W4292238148","https://openalex.org/W4323660495","https://openalex.org/W2385319785","https://openalex.org/W2900827440","https://openalex.org/W3167549738","https://openalex.org/W3204019825"],"abstract_inverted_index":{"Video":[0],"Scene":[1],"Graph":[2],"Generation":[3],"(VidSGG)":[4],"plays":[5],"a":[6,91,114,120,141],"crucial":[7],"role":[8],"in":[9,34,85],"various":[10],"visual-language":[11],"tasks":[12],"by":[13],"providing":[14],"accessible":[15],"structured":[16],"visual":[17,51,78,92,127],"relation":[18,55,82,151,159],"knowledge.":[19],"However,":[20],"the":[21,38,48,75,86,101,149],"requirement":[22],"of":[23,27,44],"annotating":[24],"all":[25],"categories":[26],"prevailing":[28],"VidSGG":[29,46,65],"methods":[30],"limits":[31],"their":[32],"application":[33],"real-world":[35],"scenarios.":[36],"Despite":[37],"popular":[39],"VLMs":[40],"facilitating":[41],"preliminary":[42],"exploration":[43],"open-vocabulary":[45,98],"tasks,":[47],"correspondence":[49],"between":[50,77,124],"union":[52,79,128,137,155],"regions":[53,80],"and":[54,81,100,130],"predicates":[56],"is":[57,94,145],"usually":[58],"ignored.":[59],"Therefore,":[60],"we":[61,111],"propose":[62],"an":[63],"Open-vocabulary":[64],"framework":[66],"named":[67],"Union-Aware":[68],"Semantic":[69],"Alignment":[70],"Network":[71],"(UASAN)":[72],"to":[73,96,103,118,134,147],"explore":[74],"alignment":[76,143],"predicate":[83],"concepts":[84],"same":[87],"semantic":[88,122],"space.":[89],"Specifically,":[90],"refiner":[93],"designed":[95],"acquire":[97],"knowledge":[99],"ability":[102],"bridge":[104],"different":[105],"modalities.":[106],"To":[107],"achieve":[108,119],"better":[109],"alignment,":[110],"first":[112],"design":[113],"semantic-aware":[115,136],"context":[116],"encoder":[117],"comprehensive":[121],"interaction":[123],"object":[125],"trajectories,":[126],"regions,":[129],"trajectory":[131],"motion":[132],"information":[133],"obtain":[135],"region":[138,156],"representations.":[139],"Then,":[140],"union-relation":[142],"decoder":[144],"utilized":[146],"generate":[148],"discriminative":[150],"token":[152],"for":[153,157],"each":[154],"final":[158],"prediction.":[160]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":3}],"updated_date":"2025-12-19T19:40:27.379048","created_date":"2025-10-10T00:00:00"}
