{"id":"https://openalex.org/W4283361014","doi":"https://doi.org/10.1145/3512527.3531371","title":"Video2Subtitle: Matching Weakly-Synchronized Sequences via Dynamic Temporal Alignment","display_name":"Video2Subtitle: Matching Weakly-Synchronized Sequences via Dynamic Temporal Alignment","publication_year":2022,"publication_date":"2022-06-23","ids":{"openalex":"https://openalex.org/W4283361014","doi":"https://doi.org/10.1145/3512527.3531371"},"language":"en","primary_location":{"id":"doi:10.1145/3512527.3531371","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3512527.3531371","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2022 International Conference on Multimedia Retrieval","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5090338646","display_name":"Xue Ben","orcid":null},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Ben Xue","raw_affiliation_strings":["Peking University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100767209","display_name":"Chenchen Liu","orcid":"https://orcid.org/0009-0007-0839-3290"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chenchen Liu","raw_affiliation_strings":["Peking University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5028877572","display_name":"Yadong Mu","orcid":"https://orcid.org/0000-0001-7815-3750"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yadong Mu","raw_affiliation_strings":["Peking University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5090338646"],"corresponding_institution_ids":["https://openalex.org/I20231570"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.04858549,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"342","last_page":"350"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/subtitle","display_name":"Subtitle","score":0.8015347123146057},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.790160059928894},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.6599628925323486},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.6356592178344727},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.6333659887313843},{"id":"https://openalex.org/keywords/synchronization","display_name":"Synchronization (alternating current)","score":0.5959656834602356},{"id":"https://openalex.org/keywords/construct","display_name":"Construct (python library)","score":0.5048456788063049},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5038279891014099},{"id":"https://openalex.org/keywords/consistency","display_name":"Consistency (knowledge bases)","score":0.4852714240550995},{"id":"https://openalex.org/keywords/duration","display_name":"Duration (music)","score":0.4510780870914459},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.3810308575630188},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3329313397407532},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.10058143734931946}],"concepts":[{"id":"https://openalex.org/C2780364048","wikidata":"https://www.wikidata.org/wiki/Q204028","display_name":"Subtitle","level":2,"score":0.8015347123146057},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.790160059928894},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.6599628925323486},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6356592178344727},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.6333659887313843},{"id":"https://openalex.org/C2778562939","wikidata":"https://www.wikidata.org/wiki/Q1298791","display_name":"Synchronization (alternating current)","level":3,"score":0.5959656834602356},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.5048456788063049},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5038279891014099},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.4852714240550995},{"id":"https://openalex.org/C112758219","wikidata":"https://www.wikidata.org/wiki/Q16038819","display_name":"Duration (music)","level":2,"score":0.4510780870914459},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.3810308575630188},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3329313397407532},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.10058143734931946},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C127162648","wikidata":"https://www.wikidata.org/wiki/Q16858953","display_name":"Channel (broadcasting)","level":2,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C124952713","wikidata":"https://www.wikidata.org/wiki/Q8242","display_name":"Literature","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3512527.3531371","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3512527.3531371","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2022 International Conference on Multimedia Retrieval","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.6000000238418579,"display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":33,"referenced_works":["https://openalex.org/W877909479","https://openalex.org/W2077811903","https://openalex.org/W2108598243","https://openalex.org/W2116261113","https://openalex.org/W2245493112","https://openalex.org/W2250539671","https://openalex.org/W2410961113","https://openalex.org/W2425121537","https://openalex.org/W2526479943","https://openalex.org/W2565656701","https://openalex.org/W2586337417","https://openalex.org/W2732026016","https://openalex.org/W2808399042","https://openalex.org/W2893436174","https://openalex.org/W2914699769","https://openalex.org/W2963184176","https://openalex.org/W2963293463","https://openalex.org/W2963446712","https://openalex.org/W2963524571","https://openalex.org/W2963541336","https://openalex.org/W2963609956","https://openalex.org/W2963890755","https://openalex.org/W2975813532","https://openalex.org/W2979157532","https://openalex.org/W2981851019","https://openalex.org/W2984008963","https://openalex.org/W3035356601","https://openalex.org/W3104862079","https://openalex.org/W3207090548","https://openalex.org/W4247924304","https://openalex.org/W4290987481","https://openalex.org/W6629651325","https://openalex.org/W6702248584"],"related_works":["https://openalex.org/W1950334511","https://openalex.org/W3119551990","https://openalex.org/W2351264416","https://openalex.org/W3004133161","https://openalex.org/W4311609618","https://openalex.org/W2952626934","https://openalex.org/W2370494932","https://openalex.org/W2975517425","https://openalex.org/W2349976842","https://openalex.org/W3172548481"],"abstract_inverted_index":{"This":[0,78],"paper":[1],"investigates":[2],"a":[3,26,30,44,82,99,117,166,199],"new":[4,59],"research":[5],"task":[6,17],"in":[7,43,187],"multimedia":[8],"analysis,":[9],"dubbed":[10],"as":[11,206],"Video2Subtitle.":[12],"The":[13,181],"goal":[14],"of":[15,40,84,108,120,131,147,173,203],"this":[16,89,162],"is":[18,46,76],"to":[19],"finding":[20],"the":[21,37,54,103,148,170],"most":[22],"plausible":[23],"subtitle":[24,45,66,114,208],"from":[25,128],"large":[27],"pool":[28],"for":[29,102,140,209],"querying":[31],"video":[32,63,111],"clip.":[33],"We":[34],"assume":[35],"that":[36],"temporal":[38,145,179],"duration":[39,119],"each":[41],"sentence":[42],"unknown.":[47],"Compared":[48],"with":[49,116,169,189],"existing":[50],"cross-modal":[51,192],"matching":[52,85,193],"tasks,":[53],"proposed":[55,182],"Video2Subtitle":[56,80,104,142],"confronts":[57],"several":[58,190],"challenges.":[60],"In":[61,88],"particular,":[62],"frames":[64],"/":[65,113,150],"sentences":[67],"are":[68,94,125],"temporally":[69],"ordered,":[70],"respectively,":[71],"yet":[72],"no":[73],"precise":[74],"synchronization":[75,146],"available.":[77],"casts":[79],"into":[81],"problem":[83],"weakly-synchronized":[86],"sequences.":[87],"work,":[90],"our":[91],"technical":[92],"contributions":[93],"two-fold.":[95],"First,":[96],"we":[97,164,196],"construct":[98],"large-scale":[100],"benchmark":[101],"task.":[105],"It":[106],"consists":[107],"about":[109],"100K":[110],"clip":[112],"pairs":[115],"full":[118],"759":[121],"hours.":[122],"All":[123],"data":[124],"automatically":[126],"trimmed":[127],"conversational":[129],"sub-parts":[130],"movies":[132],"and":[133,177],"youtube":[134],"videos.":[135,211],"Secondly,":[136],"an":[137],"ideal":[138],"algorithm":[139,168],"tackling":[141],"requires":[143],"both":[144],"visual":[149],"textual":[151],"sequences,":[152],"but":[153],"also":[154,197],"strong":[155],"semantic":[156],"consistency":[157],"between":[158],"two":[159],"modalities.":[160],"To":[161],"end,":[163],"propose":[165],"novel":[167],"key":[171],"traits":[172],"heterogeneous":[174],"multi-cue":[175],"fusion":[176],"dynamic":[178],"alignment.":[180],"method":[183],"demonstrates":[184],"excellent":[185],"performances":[186],"comparison":[188],"state-of-the-art":[191],"methods.":[194],"Additionally,":[195],"depict":[198],"few":[200],"interesting":[201],"applications":[202],"Video2Subtitle,":[204],"such":[205],"re-generating":[207],"given":[210]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
