{"id":"https://openalex.org/W4387968404","doi":"https://doi.org/10.1145/3581783.3611870","title":"VTLayout: A Multi-Modal Approach for Video Text Layout","display_name":"VTLayout: A Multi-Modal Approach for Video Text Layout","publication_year":2023,"publication_date":"2023-10-26","ids":{"openalex":"https://openalex.org/W4387968404","doi":"https://doi.org/10.1145/3581783.3611870"},"language":"en","primary_location":{"id":"doi:10.1145/3581783.3611870","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3581783.3611870","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 31st ACM International Conference on Multimedia","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5059504183","display_name":"Yuxuan Zhao","orcid":"https://orcid.org/0009-0001-1134-4808"},"institutions":[{"id":"https://openalex.org/I2250653659","display_name":"Tencent (China)","ror":"https://ror.org/00hhjss72","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250653659"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yuxuan Zhao","raw_affiliation_strings":["Tencent PCG, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Tencent PCG, Beijing, China","institution_ids":["https://openalex.org/I2250653659"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100663186","display_name":"Jin Ma","orcid":"https://orcid.org/0009-0005-5837-6144"},"institutions":[{"id":"https://openalex.org/I92403157","display_name":"University of Science and Technology Beijing","ror":"https://ror.org/02egmk993","country_code":"CN","type":"education","lineage":["https://openalex.org/I92403157"]},{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jin Ma","raw_affiliation_strings":["University of Science and Technology of China, Beijing, China"],"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China, Beijing, China","institution_ids":["https://openalex.org/I92403157","https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101500719","display_name":"Zhongang Qi","orcid":"https://orcid.org/0000-0001-8298-4063"},"institutions":[{"id":"https://openalex.org/I2250653659","display_name":"Tencent (China)","ror":"https://ror.org/00hhjss72","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250653659"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhongang Qi","raw_affiliation_strings":["Tencent PCG, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Tencent PCG, Shenzhen, China","institution_ids":["https://openalex.org/I2250653659"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5077245362","display_name":"Zehua Xie","orcid":"https://orcid.org/0009-0005-9948-4790"},"institutions":[{"id":"https://openalex.org/I2250653659","display_name":"Tencent (China)","ror":"https://ror.org/00hhjss72","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250653659"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zehua Xie","raw_affiliation_strings":["Tencent PCG, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Tencent PCG, Beijing, China","institution_ids":["https://openalex.org/I2250653659"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5037451526","display_name":"Yu Luo","orcid":"https://orcid.org/0009-0005-5766-5913"},"institutions":[{"id":"https://openalex.org/I2250653659","display_name":"Tencent (China)","ror":"https://ror.org/00hhjss72","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250653659"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yu Luo","raw_affiliation_strings":["Tencent PCG, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Tencent PCG, Shenzhen, China","institution_ids":["https://openalex.org/I2250653659"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5019480511","display_name":"Qiusheng Kang","orcid":"https://orcid.org/0009-0004-6628-6213"},"institutions":[{"id":"https://openalex.org/I2250653659","display_name":"Tencent (China)","ror":"https://ror.org/00hhjss72","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250653659"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qiusheng Kang","raw_affiliation_strings":["Tencent PCG, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Tencent PCG, Shenzhen, China","institution_ids":["https://openalex.org/I2250653659"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5102004349","display_name":"Ying Shan","orcid":"https://orcid.org/0000-0001-7673-8325"},"institutions":[{"id":"https://openalex.org/I2250653659","display_name":"Tencent (China)","ror":"https://ror.org/00hhjss72","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250653659"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ying Shan","raw_affiliation_strings":["Tencent PCG, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Tencent PCG, Shenzhen, China","institution_ids":["https://openalex.org/I2250653659"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5059504183"],"corresponding_institution_ids":["https://openalex.org/I2250653659"],"apc_list":null,"apc_paid":null,"fwci":0.1235,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.42983072,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"2775","last_page":"2784"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8208094239234924},{"id":"https://openalex.org/keywords/automatic-summarization","display_name":"Automatic summarization","score":0.6488770842552185},{"id":"https://openalex.org/keywords/video-tracking","display_name":"Video tracking","score":0.5520020723342896},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.4773545265197754},{"id":"https://openalex.org/keywords/annotation","display_name":"Annotation","score":0.4514545202255249},{"id":"https://openalex.org/keywords/subtitle","display_name":"Subtitle","score":0.43841278553009033},{"id":"https://openalex.org/keywords/video-browsing","display_name":"Video browsing","score":0.4170806407928467},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.41589006781578064},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3490641117095947},{"id":"https://openalex.org/keywords/multimedia","display_name":"Multimedia","score":0.341945081949234},{"id":"https://openalex.org/keywords/video-processing","display_name":"Video processing","score":0.2706097364425659}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8208094239234924},{"id":"https://openalex.org/C170858558","wikidata":"https://www.wikidata.org/wiki/Q1394144","display_name":"Automatic summarization","level":2,"score":0.6488770842552185},{"id":"https://openalex.org/C202474056","wikidata":"https://www.wikidata.org/wiki/Q1931635","display_name":"Video tracking","level":3,"score":0.5520020723342896},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.4773545265197754},{"id":"https://openalex.org/C2776321320","wikidata":"https://www.wikidata.org/wiki/Q857525","display_name":"Annotation","level":2,"score":0.4514545202255249},{"id":"https://openalex.org/C2780364048","wikidata":"https://www.wikidata.org/wiki/Q204028","display_name":"Subtitle","level":2,"score":0.43841278553009033},{"id":"https://openalex.org/C2775856596","wikidata":"https://www.wikidata.org/wiki/Q25141683","display_name":"Video browsing","level":4,"score":0.4170806407928467},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.41589006781578064},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3490641117095947},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.341945081949234},{"id":"https://openalex.org/C65483669","wikidata":"https://www.wikidata.org/wiki/Q3536669","display_name":"Video processing","level":2,"score":0.2706097364425659},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3581783.3611870","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3581783.3611870","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 31st ACM International Conference on Multimedia","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.8100000023841858}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":25,"referenced_works":["https://openalex.org/W1995052126","https://openalex.org/W1998981432","https://openalex.org/W2008806374","https://openalex.org/W2108598243","https://openalex.org/W2194775991","https://openalex.org/W2606911419","https://openalex.org/W2806070179","https://openalex.org/W2807605148","https://openalex.org/W2962772269","https://openalex.org/W2978036638","https://openalex.org/W2982121298","https://openalex.org/W2997154779","https://openalex.org/W2998621280","https://openalex.org/W3004268082","https://openalex.org/W3089933987","https://openalex.org/W3093272873","https://openalex.org/W3104953317","https://openalex.org/W3106963860","https://openalex.org/W3181159501","https://openalex.org/W3202382261","https://openalex.org/W3205084812","https://openalex.org/W4221167941","https://openalex.org/W4300801092","https://openalex.org/W4304013646","https://openalex.org/W6602254124"],"related_works":["https://openalex.org/W4288365488","https://openalex.org/W2032408534","https://openalex.org/W2157968140","https://openalex.org/W2763381589","https://openalex.org/W170084132","https://openalex.org/W1993097576","https://openalex.org/W2910955834","https://openalex.org/W2016545890","https://openalex.org/W2622881578","https://openalex.org/W2406608628"],"abstract_inverted_index":{"The":[0],"rapid":[1],"explosion":[2],"of":[3,12,46,90,103,160,167,202],"video":[4,13,21,47,60,66,71,82,91,109,161,180,203,207],"distribution":[5],"is":[6,63,171,219],"accompanied":[7],"by":[8,112],"a":[9,106,142],"massive":[10],"amount":[11],"text,":[14],"which":[15,153],"encompasses":[16],"rich":[17],"information":[18],"about":[19],"the":[20,44,79,88,100,104,130,150,165,172,200],"content.":[22],"While":[23],"previous":[24],"research":[25],"has":[26,49],"primarily":[27],"focused":[28],"on":[29,136,179],"text":[30,35,48,55,61,92,110,114,162,177,190,204],"extraction":[31],"from":[32],"videos":[33],"like":[34],"detection,":[36],"tracking,":[37],"recognition":[38],"and":[39,73,84,116,125,157,192,209],"end":[40,42],"to":[41,148,175,214],"spotting,":[43],"layout":[45,62,93,101,133,151,178,186,205],"received":[50],"limited":[51],"attention.":[52],"As":[53],"different":[54],"categories":[56,115,191],"convey":[57],"distinct":[58],"meanings,":[59],"critical":[64],"for":[65],"understanding":[67,208],"tasks":[68],"such":[69],"as":[70],"summarization":[72],"shooting":[74],"environment":[75],"comprehension.":[76],"To":[77,164],"bridge":[78],"gap":[80],"between":[81],"OCR":[83],"understanding,":[85],"we":[86,128],"explore":[87],"study":[89,198],"in":[94,206],"this":[95,170,215],"work.":[96],"We":[97,139],"first":[98,173],"optimize":[99],"annotation":[102,134,218],"BOVText,":[105],"bilingual,":[107],"open-world":[108],"dataset,":[111],"expanding":[113],"defining":[117],"five":[118],"clear":[119],"categories:":[120],"scene,":[121],"subtitle,":[122],"title,":[123],"logo,":[124],"other.":[126],"Additionally,":[127],"rectify":[129],"original":[131],"unreasonable":[132],"based":[135],"these":[137],"definitions.":[138],"also":[140],"propose":[141],"Video-level":[143],"Text":[144],"Layout":[145],"model":[146],"(VTLayout)":[147],"address":[149],"problem,":[152],"fuses":[154],"textual,":[155],"visual,":[156],"spatial-temporal":[158],"embedding":[159],"trajectories.":[163],"best":[166],"our":[168],"knowledge,":[169],"method":[174,183],"tackle":[176],"level.":[181],"Our":[182,217],"outperforms":[184],"image-level":[185],"methods":[187],"across":[188],"all":[189],"exhibits":[193],"faster":[194],"inference":[195],"speed.":[196],"This":[197],"underscores":[199],"significance":[201],"offers":[210],"an":[211],"effective":[212],"solution":[213],"challenge.":[216],"available":[220],"at":[221],"https://github.com/TencentARC/VTLayout.":[222]},"counts_by_year":[{"year":2024,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
