{"id":"https://openalex.org/W4391936133","doi":"https://doi.org/10.1109/tcsvt.2024.3366935","title":"Hierarchical Multi-Modal Prompting Transformer for Multi-Modal Long Document Classification","display_name":"Hierarchical Multi-Modal Prompting Transformer for Multi-Modal Long Document Classification","publication_year":2024,"publication_date":"2024-02-19","ids":{"openalex":"https://openalex.org/W4391936133","doi":"https://doi.org/10.1109/tcsvt.2024.3366935"},"language":"en","primary_location":{"id":"doi:10.1109/tcsvt.2024.3366935","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsvt.2024.3366935","pdf_url":null,"source":{"id":"https://openalex.org/S115173108","display_name":"IEEE Transactions on Circuits and Systems for Video Technology","issn_l":"1051-8215","issn":["1051-8215","1558-2205"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems for Video Technology","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100726340","display_name":"Tengfei Liu","orcid":"https://orcid.org/0000-0002-2739-5220"},"institutions":[{"id":"https://openalex.org/I37796252","display_name":"Beijing University of Technology","ror":"https://ror.org/037b1pp87","country_code":"CN","type":"education","lineage":["https://openalex.org/I37796252"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Tengfei Liu","raw_affiliation_strings":["Beijing Key Laboratory of Multimedia and Intelligent Software Technology, Beijing Institute of Artificial Intelligence, Faculty of Information Technology, Beijing University of Technology, Beijing, China","Beijing Institute of Artificial Intelligence, Faculty of Information Technology, Beijing Key Laboratory of Multimedia and Intelligent Software Technology, Beijing University of Technology, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Beijing Key Laboratory of Multimedia and Intelligent Software Technology, Beijing Institute of Artificial Intelligence, Faculty of Information Technology, Beijing University of Technology, Beijing, China","institution_ids":["https://openalex.org/I37796252"]},{"raw_affiliation_string":"Beijing Institute of Artificial Intelligence, Faculty of Information Technology, Beijing Key Laboratory of Multimedia and Intelligent Software Technology, Beijing University of Technology, Beijing, China","institution_ids":["https://openalex.org/I37796252"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5027329007","display_name":"Yongli Hu","orcid":"https://orcid.org/0000-0003-0440-438X"},"institutions":[{"id":"https://openalex.org/I37796252","display_name":"Beijing University of Technology","ror":"https://ror.org/037b1pp87","country_code":"CN","type":"education","lineage":["https://openalex.org/I37796252"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yongli Hu","raw_affiliation_strings":["Beijing Key Laboratory of Multimedia and Intelligent Software Technology, Beijing Institute of Artificial Intelligence, Faculty of Information Technology, Beijing University of Technology, Beijing, China","Beijing Institute of Artificial Intelligence, Faculty of Information Technology, Beijing Key Laboratory of Multimedia and Intelligent Software Technology, Beijing University of Technology, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Beijing Key Laboratory of Multimedia and Intelligent Software Technology, Beijing Institute of Artificial Intelligence, Faculty of Information Technology, Beijing University of Technology, Beijing, China","institution_ids":["https://openalex.org/I37796252"]},{"raw_affiliation_string":"Beijing Institute of Artificial Intelligence, Faculty of Information Technology, Beijing Key Laboratory of Multimedia and Intelligent Software Technology, Beijing University of Technology, Beijing, China","institution_ids":["https://openalex.org/I37796252"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5015817857","display_name":"Junbin Gao","orcid":"https://orcid.org/0000-0001-9803-0256"},"institutions":[{"id":"https://openalex.org/I129604602","display_name":"The University of Sydney","ror":"https://ror.org/0384j8v12","country_code":"AU","type":"education","lineage":["https://openalex.org/I129604602"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Junbin Gao","raw_affiliation_strings":["Discipline of Business Analytics, The University of Sydney Business School, The University of Sydney, Camperdown, NSW, Australia"],"affiliations":[{"raw_affiliation_string":"Discipline of Business Analytics, The University of Sydney Business School, The University of Sydney, Camperdown, NSW, Australia","institution_ids":["https://openalex.org/I129604602"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5082380547","display_name":"Yanfeng Sun","orcid":"https://orcid.org/0000-0002-0872-384X"},"institutions":[{"id":"https://openalex.org/I37796252","display_name":"Beijing University of Technology","ror":"https://ror.org/037b1pp87","country_code":"CN","type":"education","lineage":["https://openalex.org/I37796252"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yanfeng Sun","raw_affiliation_strings":["Beijing Key Laboratory of Multimedia and Intelligent Software Technology, Beijing Institute of Artificial Intelligence, Faculty of Information Technology, Beijing University of Technology, Beijing, China","Beijing Institute of Artificial Intelligence, Faculty of Information Technology, Beijing Key Laboratory of Multimedia and Intelligent Software Technology, Beijing University of Technology, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Beijing Key Laboratory of Multimedia and Intelligent Software Technology, Beijing Institute of Artificial Intelligence, Faculty of Information Technology, Beijing University of Technology, Beijing, China","institution_ids":["https://openalex.org/I37796252"]},{"raw_affiliation_string":"Beijing Institute of Artificial Intelligence, Faculty of Information Technology, Beijing Key Laboratory of Multimedia and Intelligent Software Technology, Beijing University of Technology, Beijing, China","institution_ids":["https://openalex.org/I37796252"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5020527092","display_name":"Baocai Yin","orcid":"https://orcid.org/0000-0003-3121-1823"},"institutions":[{"id":"https://openalex.org/I37796252","display_name":"Beijing University of Technology","ror":"https://ror.org/037b1pp87","country_code":"CN","type":"education","lineage":["https://openalex.org/I37796252"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Baocai Yin","raw_affiliation_strings":["Beijing Key Laboratory of Multimedia and Intelligent Software Technology, Beijing Institute of Artificial Intelligence, Faculty of Information Technology, Beijing University of Technology, Beijing, China","Beijing Institute of Artificial Intelligence, Faculty of Information Technology, Beijing Key Laboratory of Multimedia and Intelligent Software Technology, Beijing University of Technology, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Beijing Key Laboratory of Multimedia and Intelligent Software Technology, Beijing Institute of Artificial Intelligence, Faculty of Information Technology, Beijing University of Technology, Beijing, China","institution_ids":["https://openalex.org/I37796252"]},{"raw_affiliation_string":"Beijing Institute of Artificial Intelligence, Faculty of Information Technology, Beijing Key Laboratory of Multimedia and Intelligent Software Technology, Beijing University of Technology, Beijing, China","institution_ids":["https://openalex.org/I37796252"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5100726340"],"corresponding_institution_ids":["https://openalex.org/I37796252"],"apc_list":null,"apc_paid":null,"fwci":2.4326,"has_fulltext":false,"cited_by_count":7,"citation_normalized_percentile":{"value":0.8966375,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":98},"biblio":{"volume":"34","issue":"7","first_page":"6376","last_page":"6390"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/modal","display_name":"Modal","score":0.7552802562713623},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6310728788375854},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.41125887632369995},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3319370448589325}],"concepts":[{"id":"https://openalex.org/C71139939","wikidata":"https://www.wikidata.org/wiki/Q910194","display_name":"Modal","level":2,"score":0.7552802562713623},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6310728788375854},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.41125887632369995},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3319370448589325},{"id":"https://openalex.org/C188027245","wikidata":"https://www.wikidata.org/wiki/Q750446","display_name":"Polymer chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tcsvt.2024.3366935","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsvt.2024.3366935","pdf_url":null,"source":{"id":"https://openalex.org/S115173108","display_name":"IEEE Transactions on Circuits and Systems for Video Technology","issn_l":"1051-8215","issn":["1051-8215","1558-2205"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems for Video Technology","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G6077705658","display_name":null,"funder_award_id":"2021ZD0111902","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"},{"id":"https://openalex.org/G6212863906","display_name":null,"funder_award_id":"U21B2038","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G7744321187","display_name":null,"funder_award_id":"U19B2039","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320335777","display_name":"National Key Research and Development Program of China","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":66,"referenced_works":["https://openalex.org/W1832693441","https://openalex.org/W2296448531","https://openalex.org/W2470673105","https://openalex.org/W2619206542","https://openalex.org/W2739996966","https://openalex.org/W2896457183","https://openalex.org/W2905562398","https://openalex.org/W2945542139","https://openalex.org/W2951529591","https://openalex.org/W2963626623","https://openalex.org/W2964010806","https://openalex.org/W2964346351","https://openalex.org/W2973131617","https://openalex.org/W2997591727","https://openalex.org/W2998356391","https://openalex.org/W3008736151","https://openalex.org/W3014611590","https://openalex.org/W3015468748","https://openalex.org/W3033529678","https://openalex.org/W3043840704","https://openalex.org/W3091588028","https://openalex.org/W3093051361","https://openalex.org/W3101023724","https://openalex.org/W3101295217","https://openalex.org/W3161820423","https://openalex.org/W3174794493","https://openalex.org/W3176443126","https://openalex.org/W3184784418","https://openalex.org/W3203354307","https://openalex.org/W3214432797","https://openalex.org/W3217340782","https://openalex.org/W4205536074","https://openalex.org/W4206430846","https://openalex.org/W4221153068","https://openalex.org/W4226391640","https://openalex.org/W4283725425","https://openalex.org/W4285186657","https://openalex.org/W4306313147","https://openalex.org/W4309864938","https://openalex.org/W4310999936","https://openalex.org/W4312651322","https://openalex.org/W4312758889","https://openalex.org/W4312946813","https://openalex.org/W4321021726","https://openalex.org/W4321482228","https://openalex.org/W4362653417","https://openalex.org/W4376607994","https://openalex.org/W4381198646","https://openalex.org/W4385245566","https://openalex.org/W4386047824","https://openalex.org/W4386071468","https://openalex.org/W4386071547","https://openalex.org/W6735377749","https://openalex.org/W6752554729","https://openalex.org/W6771626834","https://openalex.org/W6775188310","https://openalex.org/W6776048684","https://openalex.org/W6779163297","https://openalex.org/W6781533629","https://openalex.org/W6789753369","https://openalex.org/W6791353385","https://openalex.org/W6797613833","https://openalex.org/W6810168380","https://openalex.org/W6846004651","https://openalex.org/W6847132884","https://openalex.org/W6847476102"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052","https://openalex.org/W2033914206","https://openalex.org/W2042327336"],"abstract_inverted_index":{"In":[0],"the":[1,31,47,99,106,127,153,167],"context":[2],"of":[3,41,49,140],"long":[4,182],"document":[5,183],"classification":[6,197],"(LDC),":[7],"effectively":[8],"utilizing":[9],"multi-modal":[10,102,122,181],"information":[11,158],"encompassing":[12,53],"texts":[13],"and":[14,44,62,69,75,101,108,132,147,163,177,195],"images":[15,50,61,74],"within":[16],"these":[17,82],"documents":[18],"has":[19],"not":[20],"received":[21],"adequate":[22],"attention.":[23],"This":[24],"task":[25],"showcases":[26],"several":[27],"notable":[28],"characteristics.":[29],"Firstly,":[30],"text":[32,76],"possesses":[33],"an":[34,119],"implicit":[35],"or":[36],"explicit":[37],"hierarchical":[38],"structure":[39],"consisting":[40],"sections,":[42],"sentences,":[43],"words.":[45],"Secondly,":[46],"distribution":[48],"is":[51],"dispersed,":[52],"various":[54],"types":[55,139],"such":[56],"as":[57,150],"highly":[58],"relevant":[59],"topic":[60],"loosely":[63],"related":[64],"reference":[65],"images.":[66,133],"Lastly,":[67],"intricate":[68],"diverse":[70],"relationships":[71],"exist":[72],"between":[73,114,130],"at":[77,104],"different":[78,138,161],"levels.":[79],"To":[80,165],"address":[81],"challenges,":[83],"we":[84,117,135,170],"propose":[85],"a":[86],"novel":[87],"approach":[88],"called":[89],"Hierarchical":[90],"Multi-modal":[91],"Prompting":[92],"Transformer":[93],"(HMPT).":[94],"Our":[95],"proposed":[96],"method":[97,191],"constructs":[98],"uni-modal":[100],"transformers":[103],"both":[105],"section":[107],"sentence":[109],"levels,":[110],"facilitating":[111],"effective":[112],"interaction":[113,159],"features.":[115],"Notably,":[116],"design":[118],"adaptive":[120],"multi-scale":[121],"transformer":[123],"tailored":[124],"to":[125],"capture":[126],"multi-granularity":[128],"correlations":[129],"sentences":[131],"Additionally,":[134],"introduce":[136],"three":[137],"shared":[141,144],"prompts,":[142,149],"i.e.,":[143],"section,":[145],"sentence,":[146],"image":[148],"bridges":[151],"connecting":[152],"isolated":[154],"transformers,":[155],"enabling":[156],"seamless":[157],"across":[160],"levels":[162],"modalities.":[164],"validate":[166],"model":[168],"performance,":[169],"conducted":[171],"experiments":[172],"on":[173],"two":[174,178],"newly":[175],"created":[176],"publicly":[179],"available":[180],"datasets.":[184],"The":[185],"obtained":[186],"results":[187],"show":[188],"that":[189],"our":[190],"outperforms":[192],"state-of-the-art":[193],"single-modality":[194],"multi-modality":[196],"methods.":[198]},"counts_by_year":[{"year":2025,"cited_by_count":4},{"year":2024,"cited_by_count":3}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
