{"id":"https://openalex.org/W4414938646","doi":"https://doi.org/10.1145/3728423.3759408","title":"Shot2Tactic-Caption: Multi-Scale Captioning of Badminton Videos for Tactical Understanding","display_name":"Shot2Tactic-Caption: Multi-Scale Captioning of Badminton Videos for Tactical Understanding","publication_year":2025,"publication_date":"2025-10-08","ids":{"openalex":"https://openalex.org/W4414938646","doi":"https://doi.org/10.1145/3728423.3759408"},"language":"en","primary_location":{"id":"doi:10.1145/3728423.3759408","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3728423.3759408","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 8th International ACM Workshop on Multimedia Content Analysis in Sports","raw_type":"proceedings-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2510.14617","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5085090623","display_name":"Ning Ding","orcid":"https://orcid.org/0000-0002-3067-7341"},"institutions":[{"id":"https://openalex.org/I197274945","display_name":"Nagoya Institute of Technology","ror":"https://ror.org/055yf1005","country_code":"JP","type":"education","lineage":["https://openalex.org/I197274945"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Ning Ding","raw_affiliation_strings":["Nagoya Institute of Technology, Nagoya, Japan"],"affiliations":[{"raw_affiliation_string":"Nagoya Institute of Technology, Nagoya, Japan","institution_ids":["https://openalex.org/I197274945"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5025530694","display_name":"Keisuke Fujii","orcid":"https://orcid.org/0000-0001-5487-4297"},"institutions":[{"id":"https://openalex.org/I60134161","display_name":"Nagoya University","ror":"https://ror.org/04chrp450","country_code":"JP","type":"education","lineage":["https://openalex.org/I60134161"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Keisuke Fujii","raw_affiliation_strings":["Nagoya University, Nagoya, Japan"],"affiliations":[{"raw_affiliation_string":"Nagoya University, Nagoya, Japan","institution_ids":["https://openalex.org/I60134161"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5068412717","display_name":"Toru Tamaki","orcid":"https://orcid.org/0000-0001-9712-7777"},"institutions":[{"id":"https://openalex.org/I197274945","display_name":"Nagoya Institute of Technology","ror":"https://ror.org/055yf1005","country_code":"JP","type":"education","lineage":["https://openalex.org/I197274945"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Toru Tamaki","raw_affiliation_strings":["Nagoya Institute of Technology, Nagoya, Japan"],"affiliations":[{"raw_affiliation_string":"Nagoya Institute of Technology, Nagoya, Japan","institution_ids":["https://openalex.org/I197274945"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5085090623"],"corresponding_institution_ids":["https://openalex.org/I197274945"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.29214925,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"105","last_page":"113"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9952999949455261,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/closed-captioning","display_name":"Closed captioning","score":0.9670000076293945},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.5861999988555908},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.5009999871253967},{"id":"https://openalex.org/keywords/one-shot","display_name":"One shot","score":0.3774000108242035},{"id":"https://openalex.org/keywords/shot","display_name":"Shot (pellet)","score":0.35850000381469727},{"id":"https://openalex.org/keywords/structuring","display_name":"Structuring","score":0.3562000095844269},{"id":"https://openalex.org/keywords/state","display_name":"State (computer science)","score":0.34529998898506165}],"concepts":[{"id":"https://openalex.org/C157657479","wikidata":"https://www.wikidata.org/wiki/Q2367247","display_name":"Closed captioning","level":3,"score":0.9670000076293945},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8245999813079834},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.5861999988555908},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.5009999871253967},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.47769999504089355},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4602999985218048},{"id":"https://openalex.org/C2992734406","wikidata":"https://www.wikidata.org/wiki/Q413267","display_name":"One shot","level":2,"score":0.3774000108242035},{"id":"https://openalex.org/C2778344882","wikidata":"https://www.wikidata.org/wiki/Q278938","display_name":"Shot (pellet)","level":2,"score":0.35850000381469727},{"id":"https://openalex.org/C2775945657","wikidata":"https://www.wikidata.org/wiki/Q381442","display_name":"Structuring","level":2,"score":0.3562000095844269},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3549000024795532},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.34529998898506165},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.34060001373291016},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.3212999999523163},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.2727999985218048},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.26809999346733093},{"id":"https://openalex.org/C89611455","wikidata":"https://www.wikidata.org/wiki/Q6804646","display_name":"Mechanism (biology)","level":2,"score":0.2648000121116638},{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.2590000033378601}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3728423.3759408","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3728423.3759408","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 8th International ACM Workshop on Multimedia Content Analysis in Sports","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2510.14617","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2510.14617","pdf_url":"https://arxiv.org/pdf/2510.14617","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2510.14617","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2510.14617","pdf_url":"https://arxiv.org/pdf/2510.14617","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4414938646.pdf"},"referenced_works_count":22,"referenced_works":["https://openalex.org/W1956340063","https://openalex.org/W2101105183","https://openalex.org/W2108598243","https://openalex.org/W2147677681","https://openalex.org/W2194775991","https://openalex.org/W2600463316","https://openalex.org/W2798793675","https://openalex.org/W2963155035","https://openalex.org/W2963351448","https://openalex.org/W2963916161","https://openalex.org/W3122309393","https://openalex.org/W4230038347","https://openalex.org/W4285256271","https://openalex.org/W4298129869","https://openalex.org/W4309880838","https://openalex.org/W4385764955","https://openalex.org/W4385800687","https://openalex.org/W4387846231","https://openalex.org/W4393128748","https://openalex.org/W4393995244","https://openalex.org/W4399925796","https://openalex.org/W4402726946"],"related_works":[],"abstract_inverted_index":{"Tactical":[0],"understanding":[1],"in":[2,35,193],"badminton":[3,69],"involves":[4],"interpreting":[5],"not":[6,165],"only":[7,166],"individual":[8,44],"actions":[9,45,53],"but":[10,172],"also":[11,62,173],"how":[12,51],"tactics":[13,171],"are":[14,147,179],"dynamically":[15],"executed":[16,170],"over":[17,55],"time.":[18],"In":[19],"this":[20],"paper,":[21],"we":[22,110,133],"propose":[23],"\\textbf{Shot2Tactic-Caption},":[24],"a":[25,58,82,89,92,97,113,136],"novel":[26],"framework":[27,192],"for":[28],"semantic":[29],"and":[30,46,76,96,103,124,145,151,182,197,211,220],"temporal":[31],"multi-scale":[32],"video":[33],"captioning":[34,70],"badminton,":[36],"capable":[37],"of":[38,190],"generating":[39,194],"shot-level":[40],"captions":[41,48,75],"that":[42,49,117,178,203,212],"describe":[43,168],"tactic-level":[47],"capture":[50,175],"these":[52],"unfold":[54],"time":[56],"within":[57],"tactical":[59,176],"execution.":[60],"We":[61],"introduce":[63,112],"the":[64,67,141,154,188,204],"Shot2Tactic-Caption":[65,80],"Dataset,":[66],"first":[68],"dataset":[71],"containing":[72],"5,494":[73],"shot":[74,102,196],"544":[77],"tactic":[78,104,108,120,122,125,131,143,198,222],"captions.":[79,105,199],"adopts":[81],"dual-branch":[83],"design,":[84],"with":[85],"both":[86,195],"branches":[87],"including":[88],"visual":[90],"encoder,":[91,95],"spatio-temporal":[93,206],"Transformer":[94],"Transformer-based":[98],"decoder":[99,155],"to":[100,167,174,217],"generate":[101],"To":[106],"support":[107],"captioning,":[109,132],"additionally":[111],"Tactic":[114],"Unit":[115],"Detector":[116],"identifies":[118],"valid":[119],"units,":[121],"types,":[123],"states":[126],"(e.g.,":[127],"Interrupt,":[128],"Resume).":[129],"For":[130],"further":[134],"incorporate":[135],"shot-wise":[137,159,213],"prompt-guided":[138,160],"mechanism,":[139],"where":[140],"predicted":[142],"type":[144],"state":[146],"embedded":[148],"as":[149],"prompts":[150],"injected":[152],"into":[153],"via":[156],"cross-attention.":[157],"The":[158],"mechanism":[161],"enables":[162],"our":[163,191],"system":[164],"successfully":[169],"executions":[177],"temporarily":[180],"interrupted":[181],"later":[183],"resumed.":[184],"Experimental":[185],"results":[186],"demonstrate":[187],"effectiveness":[189],"Ablation":[200],"studies":[201],"show":[202],"ResNet50-based":[205],"encoder":[207],"outperforms":[208],"other":[209],"variants,":[210],"prompt":[214],"structuring":[215],"leads":[216],"more":[218],"coherent":[219],"accurate":[221],"captioning.":[223]},"counts_by_year":[],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-10-10T00:00:00"}
