{"id":"https://openalex.org/W4312937942","doi":"https://doi.org/10.1109/icpr56361.2022.9956301","title":"Multimodal Tree Decoder for Table of Contents Extraction in Document Images","display_name":"Multimodal Tree Decoder for Table of Contents Extraction in Document Images","publication_year":2022,"publication_date":"2022-08-21","ids":{"openalex":"https://openalex.org/W4312937942","doi":"https://doi.org/10.1109/icpr56361.2022.9956301"},"language":"en","primary_location":{"id":"doi:10.1109/icpr56361.2022.9956301","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icpr56361.2022.9956301","pdf_url":null,"source":{"id":"https://openalex.org/S4363607731","display_name":"2022 26th International Conference on Pattern Recognition (ICPR)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 26th International Conference on Pattern Recognition (ICPR)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100703619","display_name":"Pengfei Hu","orcid":"https://orcid.org/0000-0002-7935-886X"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Pengfei Hu","raw_affiliation_strings":["University of Science and Technology of China,National Engineering Research Center of Speech and Language Information Processing,Hefei,P. R. China","National Engineering Research Center of Speech and Language Information Processing, University of Science and Technology of China, Hefei, P. R. China"],"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China,National Engineering Research Center of Speech and Language Information Processing,Hefei,P. R. China","institution_ids":["https://openalex.org/I126520041"]},{"raw_affiliation_string":"National Engineering Research Center of Speech and Language Information Processing, University of Science and Technology of China, Hefei, P. R. China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5012060181","display_name":"Zhenrong Zhang","orcid":"https://orcid.org/0000-0003-1125-6637"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhenrong Zhang","raw_affiliation_strings":["University of Science and Technology of China,National Engineering Research Center of Speech and Language Information Processing,Hefei,P. R. China","National Engineering Research Center of Speech and Language Information Processing, University of Science and Technology of China, Hefei, P. R. China"],"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China,National Engineering Research Center of Speech and Language Information Processing,Hefei,P. R. China","institution_ids":["https://openalex.org/I126520041"]},{"raw_affiliation_string":"National Engineering Research Center of Speech and Language Information Processing, University of Science and Technology of China, Hefei, P. R. China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101760682","display_name":"Jianshu Zhang","orcid":"https://orcid.org/0000-0002-2713-2535"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jianshu Zhang","raw_affiliation_strings":["iFLYTEK Research"],"affiliations":[{"raw_affiliation_string":"iFLYTEK Research","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5066595711","display_name":"Jun Du","orcid":"https://orcid.org/0000-0002-2387-0389"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jun Du","raw_affiliation_strings":["University of Science and Technology of China,National Engineering Research Center of Speech and Language Information Processing,Hefei,P. R. China","National Engineering Research Center of Speech and Language Information Processing, University of Science and Technology of China, Hefei, P. R. China"],"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China,National Engineering Research Center of Speech and Language Information Processing,Hefei,P. R. China","institution_ids":["https://openalex.org/I126520041"]},{"raw_affiliation_string":"National Engineering Research Center of Speech and Language Information Processing, University of Science and Technology of China, Hefei, P. R. China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101605865","display_name":"Jiajia Wu","orcid":"https://orcid.org/0000-0001-7667-4878"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jiajia Wu","raw_affiliation_strings":["iFLYTEK Research"],"affiliations":[{"raw_affiliation_string":"iFLYTEK Research","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5100703619"],"corresponding_institution_ids":["https://openalex.org/I126520041"],"apc_list":null,"apc_paid":null,"fwci":0.8428,"has_fulltext":false,"cited_by_count":15,"citation_normalized_percentile":{"value":0.819692,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1756","last_page":"1762"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10824","display_name":"Image Retrieval and Classification Techniques","score":0.9970999956130981,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T14339","display_name":"Image Processing and 3D Reconstruction","score":0.9950000047683716,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8580360412597656},{"id":"https://openalex.org/keywords/parsing","display_name":"Parsing","score":0.5826793313026428},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5819944143295288},{"id":"https://openalex.org/keywords/classifier","display_name":"Classifier (UML)","score":0.5684654116630554},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.5580230951309204},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.5147014260292053},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.4946918189525604},{"id":"https://openalex.org/keywords/decoding-methods","display_name":"Decoding methods","score":0.4715207815170288},{"id":"https://openalex.org/keywords/tree","display_name":"Tree (set theory)","score":0.4654451310634613},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4303273558616638},{"id":"https://openalex.org/keywords/metric","display_name":"Metric (unit)","score":0.412394255399704},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.40608662366867065},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.32890719175338745}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8580360412597656},{"id":"https://openalex.org/C186644900","wikidata":"https://www.wikidata.org/wiki/Q194152","display_name":"Parsing","level":2,"score":0.5826793313026428},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5819944143295288},{"id":"https://openalex.org/C95623464","wikidata":"https://www.wikidata.org/wiki/Q1096149","display_name":"Classifier (UML)","level":2,"score":0.5684654116630554},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.5580230951309204},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.5147014260292053},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.4946918189525604},{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.4715207815170288},{"id":"https://openalex.org/C113174947","wikidata":"https://www.wikidata.org/wiki/Q2859736","display_name":"Tree (set theory)","level":2,"score":0.4654451310634613},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4303273558616638},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.412394255399704},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.40608662366867065},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.32890719175338745},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icpr56361.2022.9956301","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icpr56361.2022.9956301","pdf_url":null,"source":{"id":"https://openalex.org/S4363607731","display_name":"2022 26th International Conference on Pattern Recognition (ICPR)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 26th International Conference on Pattern Recognition (ICPR)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","score":0.4699999988079071,"id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":42,"referenced_works":["https://openalex.org/W1522301498","https://openalex.org/W1755397260","https://openalex.org/W1924770834","https://openalex.org/W2041904910","https://openalex.org/W2054908521","https://openalex.org/W2082344717","https://openalex.org/W2106311500","https://openalex.org/W2121316417","https://openalex.org/W2151765755","https://openalex.org/W2194775991","https://openalex.org/W2493109812","https://openalex.org/W2565639579","https://openalex.org/W2623860192","https://openalex.org/W2785634898","https://openalex.org/W2890174976","https://openalex.org/W2896457183","https://openalex.org/W2915774443","https://openalex.org/W2951529591","https://openalex.org/W2963150697","https://openalex.org/W2963263347","https://openalex.org/W2963351448","https://openalex.org/W2982770724","https://openalex.org/W2997154779","https://openalex.org/W3003711898","https://openalex.org/W3035449864","https://openalex.org/W3104953317","https://openalex.org/W3107064625","https://openalex.org/W3120704476","https://openalex.org/W3121286774","https://openalex.org/W3180751782","https://openalex.org/W3194594797","https://openalex.org/W3203055579","https://openalex.org/W4385245566","https://openalex.org/W6631190155","https://openalex.org/W6640212811","https://openalex.org/W6726497184","https://openalex.org/W6733328719","https://openalex.org/W6739901393","https://openalex.org/W6755207826","https://openalex.org/W6760947256","https://openalex.org/W6788913622","https://openalex.org/W6798432804"],"related_works":["https://openalex.org/W3203142394","https://openalex.org/W579810227","https://openalex.org/W2952780262","https://openalex.org/W2979495269","https://openalex.org/W4302615923","https://openalex.org/W2221419418","https://openalex.org/W2086811659","https://openalex.org/W2050340680","https://openalex.org/W1700641177","https://openalex.org/W2953384362"],"abstract_inverted_index":{"Table":[0],"of":[1,9,19,82,116,130,139,173,189,195,201],"contents":[2],"(ToC)":[3],"extraction":[4],"aims":[5],"to":[6,14,43,152],"extract":[7],"headings":[8,45],"different":[10],"levels":[11],"in":[12,66],"documents":[13,81],"better":[15],"understand":[16],"the":[17,20,48,54,98,127,140,143,148,154,158,168,171,198],"outline":[18],"contents,":[21],"which":[22],"can":[23],"be":[24,208],"widely":[25],"used":[26],"for":[27,103,108,136],"document":[28],"understanding":[29],"and":[30,39,46,56,122,133,146,177,191,205],"information":[31,135],"retrieval.":[32],"Existing":[33],"works":[34],"often":[35],"use":[36],"hand-crafted":[37],"features":[38,129],"predefined":[40],"rule-based":[41],"functions":[42],"detect":[44],"resolve":[47],"hierarchical":[49,155],"relationship":[50,156],"between":[51,157],"headings.":[52],"Both":[53],"benchmark":[55,107],"research":[57],"based":[58],"on":[59,197],"deep":[60],"learning":[61],"are":[62,179],"still":[63],"limited.":[64],"Accordingly,":[65],"this":[67],"paper,":[68],"we":[69,90],"first":[70],"introduce":[71],"a":[72,92,106,161],"standard":[73],"dataset,":[74],"HierDoc,":[75],"including":[76],"image":[77],"samples":[78],"from":[79],"650":[80],"scientific":[83],"papers":[84],"with":[85],"their":[86],"content":[87],"labels.":[88],"Then":[89,142],"propose":[91],"novel":[93],"end-to-end":[94],"model":[95,112],"by":[96],"using":[97],"multimodal":[99],"tree":[100],"decoder":[101,163],"(MTD)":[102],"ToC":[104],"as":[105],"HierDoc.":[109,202],"The":[110,124,203],"MTD":[111,183],"is":[113,164],"mainly":[114],"composed":[115],"three":[117],"parts,":[118],"namely":[119],"encoder,":[120],"classifier,":[121],"decoder.":[123],"encoder":[125],"fuses":[126],"multimodality":[128],"vision,":[131],"text,":[132],"layout":[134],"each":[137],"entity":[138],"document.":[141],"classifier":[144],"recognizes":[145],"selects":[147],"heading":[149,159],"entities.":[150],"Next,":[151],"parse":[153],"entities,":[160],"tree-structured":[162],"designed.":[165],"To":[166],"evaluate":[167],"performance,":[169],"both":[170],"metric":[172],"tree-edit-distance":[174],"similarity":[175],"(TEDS)":[176],"F1-Measure":[178,194],"adopted.":[180],"Finally,":[181],"our":[182],"approach":[184],"achieves":[185],"an":[186,192],"average":[187,193],"TEDS":[188],"87.2%":[190],"88.1%":[196],"test":[199],"set":[200],"code":[204],"dataset":[206],"will":[207],"released":[209],"at:":[210],"https://github.com/Pengfei-Hu/MTD.":[211]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":6},{"year":2023,"cited_by_count":5}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
