{"id":"https://openalex.org/W4399657603","doi":"https://doi.org/10.48550/arxiv.2406.08226","title":"DistilDoc: Knowledge Distillation for Visually-Rich Document Applications","display_name":"DistilDoc: Knowledge Distillation for Visually-Rich Document Applications","publication_year":2024,"publication_date":"2024-06-12","ids":{"openalex":"https://openalex.org/W4399657603","doi":"https://doi.org/10.48550/arxiv.2406.08226"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2406.08226","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2406.08226","pdf_url":"https://arxiv.org/pdf/2406.08226","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2406.08226","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5021994697","display_name":"Jordy Van Landeghem","orcid":"https://orcid.org/0000-0002-9838-3024"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Van Landeghem, Jordy","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101714544","display_name":"Subhajit Maity","orcid":"https://orcid.org/0000-0002-0735-8406"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Maity, Subhajit","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100606339","display_name":"Ayan Banerjee","orcid":"https://orcid.org/0000-0001-6529-1644"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Banerjee, Ayan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5077783791","display_name":"Matthew B. Blaschko","orcid":"https://orcid.org/0000-0002-2640-181X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Blaschko, Matthew","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5075796989","display_name":"Marie\u2010Francine Moens","orcid":"https://orcid.org/0000-0002-3732-9323"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Moens, Marie-Francine","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5065907624","display_name":"Josep Llad\u00f3s","orcid":"https://orcid.org/0000-0002-4533-4739"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Llad\u00f3s, Josep","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5060424729","display_name":"Sanket Biswas","orcid":"https://orcid.org/0000-0001-6648-8270"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Biswas, Sanket","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":7,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10215","display_name":"Semantic Web and Ontologies","score":0.9524999856948853,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10215","display_name":"Semantic Web and Ontologies","score":0.9524999856948853,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12377","display_name":"Digital Humanities and Scholarship","score":0.911300003528595,"subfield":{"id":"https://openalex.org/subfields/1208","display_name":"Literature and Literary Theory"},"field":{"id":"https://openalex.org/fields/12","display_name":"Arts and Humanities"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/distillation","display_name":"Distillation","score":0.6014856696128845},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5598618984222412},{"id":"https://openalex.org/keywords/process-engineering","display_name":"Process engineering","score":0.3819448947906494},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.36065131425857544},{"id":"https://openalex.org/keywords/chemistry","display_name":"Chemistry","score":0.22187581658363342},{"id":"https://openalex.org/keywords/chromatography","display_name":"Chromatography","score":0.20300686359405518},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.13508903980255127}],"concepts":[{"id":"https://openalex.org/C204030448","wikidata":"https://www.wikidata.org/wiki/Q101017","display_name":"Distillation","level":2,"score":0.6014856696128845},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5598618984222412},{"id":"https://openalex.org/C21880701","wikidata":"https://www.wikidata.org/wiki/Q2144042","display_name":"Process engineering","level":1,"score":0.3819448947906494},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.36065131425857544},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.22187581658363342},{"id":"https://openalex.org/C43617362","wikidata":"https://www.wikidata.org/wiki/Q170050","display_name":"Chromatography","level":1,"score":0.20300686359405518},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.13508903980255127}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2406.08226","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2406.08226","pdf_url":"https://arxiv.org/pdf/2406.08226","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2406.08226","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2406.08226","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2406.08226","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2406.08226","pdf_url":"https://arxiv.org/pdf/2406.08226","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G2262748287","display_name":null,"funder_award_id":"501100011033","funder_id":"https://openalex.org/F4320335598","funder_display_name":"Agencia Estatal de Investigaci\u00f3n"},{"id":"https://openalex.org/G3480869486","display_name":null,"funder_award_id":"13039","funder_id":"https://openalex.org/F4320335598","funder_display_name":"Agencia Estatal de Investigaci\u00f3n"},{"id":"https://openalex.org/G451917667","display_name":null,"funder_award_id":"13039/501100011033","funder_id":"https://openalex.org/F4320335322","funder_display_name":"European Regional Development Fund"},{"id":"https://openalex.org/G5852091828","display_name":null,"funder_award_id":"13039/501100011033","funder_id":"https://openalex.org/F4320334830","funder_display_name":"Ag\u00e8ncia de Gesti\u00f3 d'Ajuts Universitaris i de Recerca"},{"id":"https://openalex.org/G5967599077","display_name":null,"funder_award_id":"501100011033","funder_id":"https://openalex.org/F4320335322","funder_display_name":"European Regional Development Fund"},{"id":"https://openalex.org/G7266728691","display_name":null,"funder_award_id":"13039/501100011033","funder_id":"https://openalex.org/F4320335598","funder_display_name":"Agencia Estatal de Investigaci\u00f3n"},{"id":"https://openalex.org/G7364240373","display_name":null,"funder_award_id":"2023 FI-3","funder_id":"https://openalex.org/F4320334830","funder_display_name":"Ag\u00e8ncia de Gesti\u00f3 d'Ajuts Universitaris i de Recerca"},{"id":"https://openalex.org/G8243135175","display_name":null,"funder_award_id":"HBC.2019.2604","funder_id":"https://openalex.org/F4320313460","funder_display_name":"Agentschap Innoveren en Ondernemen"}],"funders":[{"id":"https://openalex.org/F4320313460","display_name":"Agentschap Innoveren en Ondernemen","ror":"https://ror.org/032xdry56"},{"id":"https://openalex.org/F4320334830","display_name":"Ag\u00e8ncia de Gesti\u00f3 d'Ajuts Universitaris i de Recerca","ror":"https://ror.org/01n4pqe45"},{"id":"https://openalex.org/F4320335322","display_name":"European Regional Development Fund","ror":"https://ror.org/00k4n6c32"},{"id":"https://openalex.org/F4320335598","display_name":"Agencia Estatal de Investigaci\u00f3n","ror":null}],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4399657603.pdf"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W3085764877","https://openalex.org/W2514414740","https://openalex.org/W2377414158","https://openalex.org/W3199615306","https://openalex.org/W77207468","https://openalex.org/W3212781313","https://openalex.org/W4307725381","https://openalex.org/W124863575","https://openalex.org/W3203147184","https://openalex.org/W2037691954"],"abstract_inverted_index":{"This":[0],"work":[1],"explores":[2],"knowledge":[3,76,98,152],"distillation":[4],"(KD)":[5],"for":[6,50,74],"visually-rich":[7],"document":[8,13,18,56,140,172],"(VRD)":[9],"applications":[10],"such":[11],"as":[12],"layout":[14,173],"analysis":[15],"(DLA)":[16],"and":[17,30,78,87,100,130],"image":[19],"classification":[20],"(DIC).":[21],"While":[22],"VRD":[23],"research":[24],"is":[25],"dependent":[26],"on":[27,55,137],"increasingly":[28],"sophisticated":[29],"cumbersome":[31],"models,":[32],"the":[33,96,131,161],"field":[34],"has":[35],"neglected":[36],"to":[37,77,126,157,163,167],"study":[38,93],"efficiency":[39],"via":[40],"model":[41],"compression.":[42],"Here,":[43],"we":[44,121],"design":[45,122],"a":[46,149],"KD":[47,70],"experimentation":[48],"methodology":[49],"more":[51,170],"lean,":[52],"performant":[53],"models":[54,136],"understanding":[57],"(DU)":[58],"tasks":[59],"that":[60,102],"are":[61],"integral":[62],"within":[63],"larger":[64],"task":[65,124],"pipelines.":[66],"We":[67,92],"carefully":[68],"selected":[69],"strategies":[71],"(response-based,":[72],"feature-based)":[73],"distilling":[75],"from":[79],"backbones":[80],"with":[81,110],"different":[82],"architectures":[83],"(ResNet,":[84],"ViT,":[85],"DiT)":[86],"capacities":[88],"(base,":[89],"small,":[90],"tiny).":[91],"what":[94],"affects":[95],"teacher-student":[97],"gap":[99],"find":[101],"some":[103],"methods":[104],"(tuned":[105],"vanilla":[106],"KD,":[107],"MSE,":[108],"SimKD":[109],"an":[111],"apt":[112],"projector)":[113],"can":[114],"consistently":[115],"outperform":[116],"supervised":[117],"student":[118],"training.":[119],"Furthermore,":[120],"downstream":[123,158],"setups":[125],"evaluate":[127],"covariate":[128],"shift":[129],"robustness":[132],"of":[133],"distilled":[134],"DLA":[135],"zero-shot":[138],"layout-aware":[139],"visual":[141],"question":[142],"answering":[143],"(DocVQA).":[144],"DLA-KD":[145],"experiments":[146],"result":[147],"in":[148],"large":[150],"mAP":[151],"gap,":[153],"which":[154],"unpredictably":[155],"translates":[156],"robustness,":[159],"accentuating":[160],"need":[162],"further":[164],"explore":[165],"how":[166],"efficiently":[168],"obtain":[169],"semantic":[171],"awareness.":[174]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
