{"id":"https://openalex.org/W4385768059","doi":"https://doi.org/10.24963/ijcai.2023/585","title":"Fast-StrucTexT: An Efficient Hourglass Transformer with Modality-guided Dynamic Token Merge for Document Understanding","display_name":"Fast-StrucTexT: An Efficient Hourglass Transformer with Modality-guided Dynamic Token Merge for Document Understanding","publication_year":2023,"publication_date":"2023-08-01","ids":{"openalex":"https://openalex.org/W4385768059","doi":"https://doi.org/10.24963/ijcai.2023/585"},"language":"en","primary_location":{"id":"doi:10.24963/ijcai.2023/585","is_oa":true,"landing_page_url":"https://doi.org/10.24963/ijcai.2023/585","pdf_url":"https://www.ijcai.org/proceedings/2023/0585.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Thirty-Second International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.ijcai.org/proceedings/2023/0585.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5056275425","display_name":"Mingliang Zhai","orcid":"https://orcid.org/0000-0001-5475-9221"},"institutions":[{"id":"https://openalex.org/I125839683","display_name":"Beijing Institute of Technology","ror":"https://ror.org/01skt4w74","country_code":"CN","type":"education","lineage":["https://openalex.org/I125839683","https://openalex.org/I890469752"]},{"id":"https://openalex.org/I98301712","display_name":"Baidu (China)","ror":"https://ror.org/03vs3wt56","country_code":"CN","type":"company","lineage":["https://openalex.org/I98301712"]},{"id":"https://openalex.org/I4210127487","display_name":"Vision Technology (United States)","ror":"https://ror.org/03gmxkp43","country_code":"US","type":"company","lineage":["https://openalex.org/I4210127487"]}],"countries":["CN","US"],"is_corresponding":true,"raw_author_name":"Mingliang Zhai","raw_affiliation_strings":["Beijing Key Laboratory of Intelligent Information Technology, Beijing Institute of Technology, China","Department of Computer Vision Technology (VIS), Baidu Inc"],"affiliations":[{"raw_affiliation_string":"Beijing Key Laboratory of Intelligent Information Technology, Beijing Institute of Technology, China","institution_ids":["https://openalex.org/I125839683"]},{"raw_affiliation_string":"Department of Computer Vision Technology (VIS), Baidu Inc","institution_ids":["https://openalex.org/I98301712","https://openalex.org/I4210127487"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100413222","display_name":"Yulin Li","orcid":"https://orcid.org/0000-0001-6907-5594"},"institutions":[{"id":"https://openalex.org/I4210127487","display_name":"Vision Technology (United States)","ror":"https://ror.org/03gmxkp43","country_code":"US","type":"company","lineage":["https://openalex.org/I4210127487"]},{"id":"https://openalex.org/I98301712","display_name":"Baidu (China)","ror":"https://ror.org/03vs3wt56","country_code":"CN","type":"company","lineage":["https://openalex.org/I98301712"]}],"countries":["CN","US"],"is_corresponding":false,"raw_author_name":"Yulin Li","raw_affiliation_strings":["Department of Computer Vision Technology (VIS), Baidu Inc"],"affiliations":[{"raw_affiliation_string":"Department of Computer Vision Technology (VIS), Baidu Inc","institution_ids":["https://openalex.org/I98301712","https://openalex.org/I4210127487"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113633885","display_name":"Xiameng Qin","orcid":"https://orcid.org/0000-0002-6022-5952"},"institutions":[{"id":"https://openalex.org/I98301712","display_name":"Baidu (China)","ror":"https://ror.org/03vs3wt56","country_code":"CN","type":"company","lineage":["https://openalex.org/I98301712"]},{"id":"https://openalex.org/I4210127487","display_name":"Vision Technology (United States)","ror":"https://ror.org/03gmxkp43","country_code":"US","type":"company","lineage":["https://openalex.org/I4210127487"]}],"countries":["CN","US"],"is_corresponding":false,"raw_author_name":"Xiameng Qin","raw_affiliation_strings":["Department of Computer Vision Technology (VIS), Baidu Inc"],"affiliations":[{"raw_affiliation_string":"Department of Computer Vision Technology (VIS), Baidu Inc","institution_ids":["https://openalex.org/I98301712","https://openalex.org/I4210127487"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101205183","display_name":"Chen Yi","orcid":"https://orcid.org/0009-0001-1381-9395"},"institutions":[{"id":"https://openalex.org/I98301712","display_name":"Baidu (China)","ror":"https://ror.org/03vs3wt56","country_code":"CN","type":"company","lineage":["https://openalex.org/I98301712"]},{"id":"https://openalex.org/I4210127487","display_name":"Vision Technology (United States)","ror":"https://ror.org/03gmxkp43","country_code":"US","type":"company","lineage":["https://openalex.org/I4210127487"]}],"countries":["CN","US"],"is_corresponding":false,"raw_author_name":"Chen Yi","raw_affiliation_strings":["Department of Computer Vision Technology (VIS), Baidu Inc"],"affiliations":[{"raw_affiliation_string":"Department of Computer Vision Technology (VIS), Baidu Inc","institution_ids":["https://openalex.org/I98301712","https://openalex.org/I4210127487"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100570870","display_name":"Qunyi Xie","orcid":null},"institutions":[{"id":"https://openalex.org/I4210127487","display_name":"Vision Technology (United States)","ror":"https://ror.org/03gmxkp43","country_code":"US","type":"company","lineage":["https://openalex.org/I4210127487"]},{"id":"https://openalex.org/I98301712","display_name":"Baidu (China)","ror":"https://ror.org/03vs3wt56","country_code":"CN","type":"company","lineage":["https://openalex.org/I98301712"]}],"countries":["CN","US"],"is_corresponding":false,"raw_author_name":"Qunyi Xie","raw_affiliation_strings":["Department of Computer Vision Technology (VIS), Baidu Inc"],"affiliations":[{"raw_affiliation_string":"Department of Computer Vision Technology (VIS), Baidu Inc","institution_ids":["https://openalex.org/I98301712","https://openalex.org/I4210127487"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056247902","display_name":"Chengquan Zhang","orcid":null},"institutions":[{"id":"https://openalex.org/I4210127487","display_name":"Vision Technology (United States)","ror":"https://ror.org/03gmxkp43","country_code":"US","type":"company","lineage":["https://openalex.org/I4210127487"]},{"id":"https://openalex.org/I98301712","display_name":"Baidu (China)","ror":"https://ror.org/03vs3wt56","country_code":"CN","type":"company","lineage":["https://openalex.org/I98301712"]}],"countries":["CN","US"],"is_corresponding":false,"raw_author_name":"Chengquan Zhang","raw_affiliation_strings":["Department of Computer Vision Technology (VIS), Baidu Inc"],"affiliations":[{"raw_affiliation_string":"Department of Computer Vision Technology (VIS), Baidu Inc","institution_ids":["https://openalex.org/I98301712","https://openalex.org/I4210127487"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5008391873","display_name":"Kun Yao","orcid":"https://orcid.org/0000-0003-2032-7441"},"institutions":[{"id":"https://openalex.org/I4210127487","display_name":"Vision Technology (United States)","ror":"https://ror.org/03gmxkp43","country_code":"US","type":"company","lineage":["https://openalex.org/I4210127487"]},{"id":"https://openalex.org/I98301712","display_name":"Baidu (China)","ror":"https://ror.org/03vs3wt56","country_code":"CN","type":"company","lineage":["https://openalex.org/I98301712"]}],"countries":["CN","US"],"is_corresponding":false,"raw_author_name":"Kun Yao","raw_affiliation_strings":["Department of Computer Vision Technology (VIS), Baidu Inc"],"affiliations":[{"raw_affiliation_string":"Department of Computer Vision Technology (VIS), Baidu Inc","institution_ids":["https://openalex.org/I98301712","https://openalex.org/I4210127487"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071656520","display_name":"Yuwei Wu","orcid":"https://orcid.org/0000-0001-6300-6336"},"institutions":[{"id":"https://openalex.org/I98301712","display_name":"Baidu (China)","ror":"https://ror.org/03vs3wt56","country_code":"CN","type":"company","lineage":["https://openalex.org/I98301712"]},{"id":"https://openalex.org/I125839683","display_name":"Beijing Institute of Technology","ror":"https://ror.org/01skt4w74","country_code":"CN","type":"education","lineage":["https://openalex.org/I125839683","https://openalex.org/I890469752"]},{"id":"https://openalex.org/I4388482657","display_name":"Shenzhen MSU-BIT University","ror":"https://ror.org/02q963474","country_code":null,"type":"education","lineage":["https://openalex.org/I4388482657"]},{"id":"https://openalex.org/I180726961","display_name":"Shenzhen University","ror":"https://ror.org/01vy4gh70","country_code":"CN","type":"education","lineage":["https://openalex.org/I180726961"]},{"id":"https://openalex.org/I4210127487","display_name":"Vision Technology (United States)","ror":"https://ror.org/03gmxkp43","country_code":"US","type":"company","lineage":["https://openalex.org/I4210127487"]}],"countries":["CN","US"],"is_corresponding":false,"raw_author_name":"Yuwei Wu","raw_affiliation_strings":["Beijing Key Laboratory of Intelligent Information Technology, Beijing Institute of Technology, China","Guangdong Laboratory of Machine Perception and Intelligent Computing, Shenzhen MSU-BIT University, China","Department of Computer Vision Technology (VIS), Baidu Inc"],"affiliations":[{"raw_affiliation_string":"Beijing Key Laboratory of Intelligent Information Technology, Beijing Institute of Technology, China","institution_ids":["https://openalex.org/I125839683"]},{"raw_affiliation_string":"Guangdong Laboratory of Machine Perception and Intelligent Computing, Shenzhen MSU-BIT University, China","institution_ids":["https://openalex.org/I180726961","https://openalex.org/I4388482657"]},{"raw_affiliation_string":"Department of Computer Vision Technology (VIS), Baidu Inc","institution_ids":["https://openalex.org/I98301712","https://openalex.org/I4210127487"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100731042","display_name":"Yunde Jia","orcid":"https://orcid.org/0000-0003-1900-8945"},"institutions":[{"id":"https://openalex.org/I4388482657","display_name":"Shenzhen MSU-BIT University","ror":"https://ror.org/02q963474","country_code":null,"type":"education","lineage":["https://openalex.org/I4388482657"]},{"id":"https://openalex.org/I180726961","display_name":"Shenzhen University","ror":"https://ror.org/01vy4gh70","country_code":"CN","type":"education","lineage":["https://openalex.org/I180726961"]},{"id":"https://openalex.org/I125839683","display_name":"Beijing Institute of Technology","ror":"https://ror.org/01skt4w74","country_code":"CN","type":"education","lineage":["https://openalex.org/I125839683","https://openalex.org/I890469752"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yunde Jia","raw_affiliation_strings":["Beijing Key Laboratory of Intelligent Information Technology, Beijing Institute of Technology, China","Guangdong Laboratory of Machine Perception and Intelligent Computing, Shenzhen MSU-BIT University, China"],"affiliations":[{"raw_affiliation_string":"Beijing Key Laboratory of Intelligent Information Technology, Beijing Institute of Technology, China","institution_ids":["https://openalex.org/I125839683"]},{"raw_affiliation_string":"Guangdong Laboratory of Machine Perception and Intelligent Computing, Shenzhen MSU-BIT University, China","institution_ids":["https://openalex.org/I180726961","https://openalex.org/I4388482657"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":9,"corresponding_author_ids":["https://openalex.org/A5056275425"],"corresponding_institution_ids":["https://openalex.org/I125839683","https://openalex.org/I4210127487","https://openalex.org/I98301712"],"apc_list":null,"apc_paid":null,"fwci":0.8522,"has_fulltext":false,"cited_by_count":7,"citation_normalized_percentile":{"value":0.75373876,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"5269","last_page":"5277"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9973000288009644,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9934999942779541,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8116434812545776},{"id":"https://openalex.org/keywords/security-token","display_name":"Security token","score":0.7289520502090454},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.49412062764167786},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.48914483189582825},{"id":"https://openalex.org/keywords/hourglass","display_name":"Hourglass","score":0.46113717555999756},{"id":"https://openalex.org/keywords/granularity","display_name":"Granularity","score":0.44551894068717957},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4253089427947998},{"id":"https://openalex.org/keywords/modal","display_name":"Modal","score":0.41618967056274414},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.33891552686691284},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.11516919732093811}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8116434812545776},{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.7289520502090454},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.49412062764167786},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.48914483189582825},{"id":"https://openalex.org/C127532173","wikidata":"https://www.wikidata.org/wiki/Q179904","display_name":"Hourglass","level":2,"score":0.46113717555999756},{"id":"https://openalex.org/C177774035","wikidata":"https://www.wikidata.org/wiki/Q1246948","display_name":"Granularity","level":2,"score":0.44551894068717957},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4253089427947998},{"id":"https://openalex.org/C71139939","wikidata":"https://www.wikidata.org/wiki/Q910194","display_name":"Modal","level":2,"score":0.41618967056274414},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.33891552686691284},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.11516919732093811},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C166957645","wikidata":"https://www.wikidata.org/wiki/Q23498","display_name":"Archaeology","level":1,"score":0.0},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C95457728","wikidata":"https://www.wikidata.org/wiki/Q309","display_name":"History","level":0,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C188027245","wikidata":"https://www.wikidata.org/wiki/Q750446","display_name":"Polymer chemistry","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.24963/ijcai.2023/585","is_oa":true,"landing_page_url":"https://doi.org/10.24963/ijcai.2023/585","pdf_url":"https://www.ijcai.org/proceedings/2023/0585.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Thirty-Second International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.24963/ijcai.2023/585","is_oa":true,"landing_page_url":"https://doi.org/10.24963/ijcai.2023/585","pdf_url":"https://www.ijcai.org/proceedings/2023/0585.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Thirty-Second International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1231421488","display_name":null,"funder_award_id":"under","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G2087396116","display_name":null,"funder_award_id":"China","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3317480652","display_name":null,"funder_award_id":"Science","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G4365746725","display_name":null,"funder_award_id":"62176021","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5994120800","display_name":null,"funder_award_id":"Natural","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6632724818","display_name":null,"funder_award_id":"2172041","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G7033253288","display_name":null,"funder_award_id":"Grants","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G7255922863","display_name":null,"funder_award_id":"62172041","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8208342437","display_name":null,"funder_award_id":"1 and","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8863666567","display_name":null,"funder_award_id":"and No.","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4385768059.pdf"},"referenced_works_count":43,"referenced_works":["https://openalex.org/W2194775991","https://openalex.org/W2896457183","https://openalex.org/W2953273646","https://openalex.org/W2962772269","https://openalex.org/W2965373594","https://openalex.org/W2986619406","https://openalex.org/W3000758063","https://openalex.org/W3015468748","https://openalex.org/W3033188311","https://openalex.org/W3093218477","https://openalex.org/W3104953317","https://openalex.org/W3105966348","https://openalex.org/W3132296545","https://openalex.org/W3173306993","https://openalex.org/W3173325518","https://openalex.org/W3176664887","https://openalex.org/W3176851559","https://openalex.org/W3182680257","https://openalex.org/W3190448953","https://openalex.org/W3190965961","https://openalex.org/W3205981739","https://openalex.org/W3207806388","https://openalex.org/W3214042621","https://openalex.org/W4221167941","https://openalex.org/W4239072543","https://openalex.org/W4285105124","https://openalex.org/W4285241172","https://openalex.org/W4286750734","https://openalex.org/W4287115696","https://openalex.org/W4287551929","https://openalex.org/W4297853071","https://openalex.org/W4302275239","https://openalex.org/W4304013646","https://openalex.org/W4306311936","https://openalex.org/W4309386164","https://openalex.org/W4312769570","https://openalex.org/W4312849330","https://openalex.org/W4312960790","https://openalex.org/W4312980231","https://openalex.org/W4313170858","https://openalex.org/W4319300012","https://openalex.org/W4323654151","https://openalex.org/W4386221015"],"related_works":["https://openalex.org/W2008001194","https://openalex.org/W2512390310","https://openalex.org/W2059645356","https://openalex.org/W3141841835","https://openalex.org/W2380057626","https://openalex.org/W2033695776","https://openalex.org/W2354533751","https://openalex.org/W2755014831","https://openalex.org/W1509096452","https://openalex.org/W4304700937"],"abstract_inverted_index":{"Transformers":[0],"achieve":[1,59],"promising":[2],"performance":[3,167],"in":[4,44,148],"document":[5,90],"understanding":[6],"because":[7],"of":[8],"their":[9],"high":[10],"effectiveness":[11],"and":[12,49,55,65,108,127,157,168],"still":[13],"suffer":[14],"from":[15],"quadratic":[16],"computational":[17],"complexity":[18],"dependency":[19],"on":[20,51,79,154],"the":[21,41,69,80,103,130,165,175],"sequence":[22],"length.":[23],"General":[24],"efficient":[25,75],"transformers":[26],"are":[27,37],"challenging":[28],"to":[29,33,39,58,101,123,141],"be":[30],"directly":[31],"adapted":[32],"model":[34,104,163],"document.":[35],"They":[36],"unable":[38],"handle":[40],"layout":[42],"representation":[43,107],"documents,":[45],"e.g.":[46],"word,":[47],"line":[48],"paragraph,":[50],"different":[52],"granularity":[53],"levels":[54],"seem":[56],"hard":[57],"a":[60,95,115,149],"good":[61],"trade-off":[62],"between":[63],"efficiency":[64],"performance.":[66],"To":[67],"tackle":[68],"concerns,":[70],"we":[71,93,113],"propose":[72],"Fast-StrucTexT,":[73],"an":[74,84],"multi-modal":[76,116,125],"framework":[77],"based":[78],"StrucTexT":[81],"algorithm":[82],"with":[83,145],"hourglass":[85],"transformer":[86],"architecture,":[87],"for":[88],"visual":[89],"understanding.":[91],"Specifically,":[92],"design":[94],"modality-guided":[96],"dynamic":[97],"token":[98,131],"merging":[99],"block":[100],"make":[102],"learn":[105],"multi-granularity":[106],"prunes":[109],"redundant":[110],"tokens.":[111],"Additionally,":[112],"present":[114],"interaction":[117],"module":[118],"called":[119],"Symmetry":[120],"Cross-Attention":[121],"(SCA)":[122],"consider":[124],"fusion":[126],"efficiently":[128],"guide":[129],"mergence.":[132],"The":[133],"SCA":[134],"allows":[135],"one":[136],"modality":[137,147],"input":[138],"as":[139],"query":[140],"calculate":[142],"cross":[143],"attention":[144],"another":[146],"dual":[150],"phase.":[151],"Extensive":[152],"experiments":[153],"FUNSD,":[155],"SROIE,":[156],"CORD":[158],"datasets":[159],"demonstrate":[160],"that":[161],"our":[162],"achieves":[164],"state-of-the-art":[166,176],"almost":[169],"1.9x":[170],"faster":[171],"inference":[172],"time":[173],"than":[174],"methods.":[177]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":4},{"year":2023,"cited_by_count":1}],"updated_date":"2026-03-18T14:38:29.013473","created_date":"2025-10-10T00:00:00"}
