{"id":"https://openalex.org/W4295135076","doi":"https://doi.org/10.1145/3538533","title":"Guided Graph Attention Learning for Video-Text Matching","display_name":"Guided Graph Attention Learning for Video-Text Matching","publication_year":2022,"publication_date":"2022-06-30","ids":{"openalex":"https://openalex.org/W4295135076","doi":"https://doi.org/10.1145/3538533"},"language":"en","primary_location":{"id":"doi:10.1145/3538533","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3538533","pdf_url":null,"source":{"id":"https://openalex.org/S19610489","display_name":"ACM Transactions on Multimedia Computing Communications and Applications","issn_l":"1551-6857","issn":["1551-6857","1551-6865"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Multimedia Computing, Communications, and Applications","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100654289","display_name":"Kunpeng Li","orcid":"https://orcid.org/0000-0001-5805-793X"},"institutions":[{"id":"https://openalex.org/I12912129","display_name":"Northeastern University","ror":"https://ror.org/04t5xt781","country_code":"US","type":"education","lineage":["https://openalex.org/I12912129"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Kunpeng Li","raw_affiliation_strings":["Northeastern University, Boston, Massachusetts, USA"],"raw_orcid":"https://orcid.org/0000-0001-5805-793X","affiliations":[{"raw_affiliation_string":"Northeastern University, Boston, Massachusetts, USA","institution_ids":["https://openalex.org/I12912129"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102970528","display_name":"Chang Liu","orcid":"https://orcid.org/0000-0002-0219-4748"},"institutions":[{"id":"https://openalex.org/I12912129","display_name":"Northeastern University","ror":"https://ror.org/04t5xt781","country_code":"US","type":"education","lineage":["https://openalex.org/I12912129"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Chang Liu","raw_affiliation_strings":["Northeastern University, Boston, Massachusetts, USA"],"raw_orcid":"https://orcid.org/0000-0002-0219-4748","affiliations":[{"raw_affiliation_string":"Northeastern University, Boston, Massachusetts, USA","institution_ids":["https://openalex.org/I12912129"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5097501631","display_name":"Mike Stopa","orcid":null},"institutions":[{"id":"https://openalex.org/I4210133775","display_name":"Konica Minolta (United States)","ror":"https://ror.org/03zv4kd57","country_code":"US","type":"company","lineage":["https://openalex.org/I4210133775","https://openalex.org/I4210151549"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Mike Stopa","raw_affiliation_strings":["Konica Minolta, San Mateo, California, USA"],"raw_orcid":"https://orcid.org/0000-0002-1418-2437","affiliations":[{"raw_affiliation_string":"Konica Minolta, San Mateo, California, USA","institution_ids":["https://openalex.org/I4210133775"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5001149674","display_name":"Jun Amano","orcid":"https://orcid.org/0000-0002-9653-794X"},"institutions":[{"id":"https://openalex.org/I4210133775","display_name":"Konica Minolta (United States)","ror":"https://ror.org/03zv4kd57","country_code":"US","type":"company","lineage":["https://openalex.org/I4210133775","https://openalex.org/I4210151549"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jun Amano","raw_affiliation_strings":["Konica Minolta, San Mateo, California, USA"],"raw_orcid":"https://orcid.org/0000-0002-9653-794X","affiliations":[{"raw_affiliation_string":"Konica Minolta, San Mateo, California, USA","institution_ids":["https://openalex.org/I4210133775"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5005819096","display_name":"Yun Fu","orcid":"https://orcid.org/0000-0002-5098-2853"},"institutions":[{"id":"https://openalex.org/I12912129","display_name":"Northeastern University","ror":"https://ror.org/04t5xt781","country_code":"US","type":"education","lineage":["https://openalex.org/I12912129"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yun Fu","raw_affiliation_strings":["Northeastern University, Boston, Massachusetts, USA"],"raw_orcid":"https://orcid.org/0000-0002-5098-2853","affiliations":[{"raw_affiliation_string":"Northeastern University, Boston, Massachusetts, USA","institution_ids":["https://openalex.org/I12912129"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.6092,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.67530574,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":97},"biblio":{"volume":"18","issue":"2s","first_page":"1","last_page":"23"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9950000047683716,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9927999973297119,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8293619155883789},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.6143285036087036},{"id":"https://openalex.org/keywords/interpretability","display_name":"Interpretability","score":0.5169443488121033},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5104798078536987},{"id":"https://openalex.org/keywords/graph","display_name":"Graph","score":0.46640175580978394},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.45593753457069397},{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.4541064202785492},{"id":"https://openalex.org/keywords/feature-learning","display_name":"Feature learning","score":0.42445671558380127},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.42108285427093506},{"id":"https://openalex.org/keywords/semantic-matching","display_name":"Semantic matching","score":0.4104396402835846},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.38911595940589905},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3746521770954132},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3694477081298828},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.26754218339920044}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8293619155883789},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.6143285036087036},{"id":"https://openalex.org/C2781067378","wikidata":"https://www.wikidata.org/wiki/Q17027399","display_name":"Interpretability","level":2,"score":0.5169443488121033},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5104798078536987},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.46640175580978394},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.45593753457069397},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.4541064202785492},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.42445671558380127},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.42108285427093506},{"id":"https://openalex.org/C2778493491","wikidata":"https://www.wikidata.org/wiki/Q7449072","display_name":"Semantic matching","level":3,"score":0.4104396402835846},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.38911595940589905},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3746521770954132},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3694477081298828},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.26754218339920044},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3538533","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3538533","pdf_url":null,"source":{"id":"https://openalex.org/S19610489","display_name":"ACM Transactions on Multimedia Computing Communications and Applications","issn_l":"1551-6857","issn":["1551-6857","1551-6865"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Multimedia Computing, Communications, and Applications","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.7599999904632568,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":43,"referenced_works":["https://openalex.org/W398859631","https://openalex.org/W1522301498","https://openalex.org/W1527575280","https://openalex.org/W1924770834","https://openalex.org/W2062183687","https://openalex.org/W2107019937","https://openalex.org/W2158937425","https://openalex.org/W2277195237","https://openalex.org/W2333091651","https://openalex.org/W2526286384","https://openalex.org/W2560662850","https://openalex.org/W2592549418","https://openalex.org/W2725922920","https://openalex.org/W2745461083","https://openalex.org/W2808399042","https://openalex.org/W2890855364","https://openalex.org/W2911286998","https://openalex.org/W2952402334","https://openalex.org/W2954400107","https://openalex.org/W2962884155","https://openalex.org/W2963091558","https://openalex.org/W3023742835","https://openalex.org/W3033696290","https://openalex.org/W3048602385","https://openalex.org/W3081484346","https://openalex.org/W3128401049","https://openalex.org/W3130796238","https://openalex.org/W3152798676","https://openalex.org/W3157185431","https://openalex.org/W3167363556","https://openalex.org/W3173223111","https://openalex.org/W3175939205","https://openalex.org/W3212304713","https://openalex.org/W4210894218","https://openalex.org/W4214708455","https://openalex.org/W4221079634","https://openalex.org/W4249009392","https://openalex.org/W4285518780","https://openalex.org/W4286696412","https://openalex.org/W4310895557","https://openalex.org/W4394659899","https://openalex.org/W6754725917","https://openalex.org/W6764636640"],"related_works":["https://openalex.org/W2905433371","https://openalex.org/W4390569940","https://openalex.org/W2888392564","https://openalex.org/W4361193272","https://openalex.org/W4310278675","https://openalex.org/W4388422664","https://openalex.org/W2806259446","https://openalex.org/W2963326959","https://openalex.org/W4247136043","https://openalex.org/W4312407344"],"abstract_inverted_index":{"As":[0,173],"a":[1,12,28,57,75,170],"bridge":[2],"between":[3,102],"videos":[4,33,53],"and":[5,34,105,113,147,230],"natural":[6],"languages,":[7],"video-text":[8,166,187],"matching":[9],"has":[10,231],"been":[11],"hot":[13],"multimedia":[14,174],"research":[15],"topic":[16],"in":[17,56,65,176,227],"recent":[18,165],"years.":[19],"Such":[20],"cross-modal":[21],"retrieval":[22,167],"is":[23,41,123],"usually":[24],"achieved":[25],"by":[26,88,214],"learning":[27,87,128],"common":[29],"embedding":[30,86,140],"space":[31],"where":[32],"text":[35,68,153],"captions":[36],"are":[37,63],"directly":[38],"comparable.":[39],"It":[40],"still":[42],"challenging":[43],"because":[44],"existing":[45],"visual":[46],"representations":[47,188],"cannot":[48],"exploit":[49],"semantic":[50,60,92,145,225],"correlations":[51],"within":[52,94],"well,":[54],"resulting":[55],"mismatch":[58],"with":[59,152,169],"concepts":[61,93,146,226],"that":[62,135,161,189,219],"contained":[64],"the":[66,95,136,204,228,235],"corresponding":[67],"descriptions.":[69],"In":[70],"this":[71,119,130],"article,":[72],"we":[73,182,209],"propose":[74],"new":[76],"Guided":[77],"Graph":[78],"Attention":[79],"Learning":[80],"(GGAL)":[81],"model":[82,99],"to":[83,125,194],"enhance":[84],"video":[85,139,229],"capturing":[89],"important":[90],"region-level":[91],"spatiotemporal":[96],"space.":[97],"Our":[98],"builds":[100],"connections":[101],"object":[103],"regions":[104],"performs":[106],"hierarchical":[107,131],"graph":[108,132],"reasoning":[109],"on":[110,129,143,156,223,234,239],"both":[111],"frame-level":[112],"whole":[114],"video\u2013level":[115],"region":[116,244],"graphs.":[117,245],"During":[118],"process,":[120],"global":[121],"context":[122,236],"used":[124,158],"guide":[126],"attention":[127,211,233],"topology":[133],"so":[134],"learned":[137,186,213],"overall":[138],"can":[141,148,190],"focus":[142],"essential":[144],"be":[149,191],"better":[150],"aligned":[151],"captions.":[154],"Experiments":[155],"commonly":[157],"benchmarks":[159],"validate":[160,184],"GGAL":[162,185,215,220],"outperforms":[163],"many":[164],"methods":[168],"clear":[171],"margin.":[172],"data":[175,197],"dynamic":[177],"environments":[178],"becomes":[179],"critically":[180],"important,":[181],"also":[183],"generalized":[192],"well":[193],"unseen":[195],"out-of-domain":[196],"via":[198],"cross-dataset":[199],"evaluations.":[200],"To":[201],"further":[202],"investigate":[203],"interpretability":[205],"of":[206,242],"our":[207],"model,":[208],"visualize":[210],"weights":[212],"models.":[216],"We":[217],"find":[218],"successfully":[221],"focuses":[222],"key":[224],"complementary":[232],"parts":[237],"based":[238],"different":[240],"ways":[241],"building":[243]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
