{"id":"https://openalex.org/W3182680257","doi":"https://doi.org/10.1109/cvpr46437.2021.00560","title":"SelfDoc: Self-Supervised Document Representation Learning","display_name":"SelfDoc: Self-Supervised Document Representation Learning","publication_year":2021,"publication_date":"2021-06-01","ids":{"openalex":"https://openalex.org/W3182680257","doi":"https://doi.org/10.1109/cvpr46437.2021.00560","mag":"3182680257"},"language":"en","primary_location":{"id":"doi:10.1109/cvpr46437.2021.00560","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cvpr46437.2021.00560","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5103167145","display_name":"Peizhao Li","orcid":"https://orcid.org/0000-0002-7593-2473"},"institutions":[{"id":"https://openalex.org/I6902469","display_name":"Brandeis University","ror":"https://ror.org/05abbep66","country_code":"US","type":"education","lineage":["https://openalex.org/I6902469"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Peizhao Li","raw_affiliation_strings":["Brandeis University"],"affiliations":[{"raw_affiliation_string":"Brandeis University","institution_ids":["https://openalex.org/I6902469"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5005119482","display_name":"Jiuxiang Gu","orcid":"https://orcid.org/0000-0002-3437-5084"},"institutions":[{"id":"https://openalex.org/I1306409833","display_name":"Adobe Systems (United States)","ror":"https://ror.org/059tvcg64","country_code":"US","type":"company","lineage":["https://openalex.org/I1306409833"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jiuxiang Gu","raw_affiliation_strings":["Adobe Research"],"affiliations":[{"raw_affiliation_string":"Adobe Research","institution_ids":["https://openalex.org/I1306409833"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5076366439","display_name":"Jason Kuen","orcid":"https://orcid.org/0000-0001-5099-8145"},"institutions":[{"id":"https://openalex.org/I1306409833","display_name":"Adobe Systems (United States)","ror":"https://ror.org/059tvcg64","country_code":"US","type":"company","lineage":["https://openalex.org/I1306409833"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jason Kuen","raw_affiliation_strings":["Adobe Research"],"affiliations":[{"raw_affiliation_string":"Adobe Research","institution_ids":["https://openalex.org/I1306409833"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038267072","display_name":"Vlad I. Morariu","orcid":"https://orcid.org/0000-0001-7937-7748"},"institutions":[{"id":"https://openalex.org/I1306409833","display_name":"Adobe Systems (United States)","ror":"https://ror.org/059tvcg64","country_code":"US","type":"company","lineage":["https://openalex.org/I1306409833"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Vlad I. Morariu","raw_affiliation_strings":["Adobe Research"],"affiliations":[{"raw_affiliation_string":"Adobe Research","institution_ids":["https://openalex.org/I1306409833"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5024078415","display_name":"Handong Zhao","orcid":"https://orcid.org/0000-0003-3775-2954"},"institutions":[{"id":"https://openalex.org/I1306409833","display_name":"Adobe Systems (United States)","ror":"https://ror.org/059tvcg64","country_code":"US","type":"company","lineage":["https://openalex.org/I1306409833"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Handong Zhao","raw_affiliation_strings":["Adobe Research"],"affiliations":[{"raw_affiliation_string":"Adobe Research","institution_ids":["https://openalex.org/I1306409833"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5031169949","display_name":"Rajiv Jain","orcid":"https://orcid.org/0000-0002-5322-9074"},"institutions":[{"id":"https://openalex.org/I1306409833","display_name":"Adobe Systems (United States)","ror":"https://ror.org/059tvcg64","country_code":"US","type":"company","lineage":["https://openalex.org/I1306409833"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Rajiv Jain","raw_affiliation_strings":["Adobe Research"],"affiliations":[{"raw_affiliation_string":"Adobe Research","institution_ids":["https://openalex.org/I1306409833"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110942123","display_name":"Varun Manjunatha","orcid":null},"institutions":[{"id":"https://openalex.org/I1306409833","display_name":"Adobe Systems (United States)","ror":"https://ror.org/059tvcg64","country_code":"US","type":"company","lineage":["https://openalex.org/I1306409833"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Varun Manjunatha","raw_affiliation_strings":["Adobe Research"],"affiliations":[{"raw_affiliation_string":"Adobe Research","institution_ids":["https://openalex.org/I1306409833"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101987910","display_name":"Hongfu Liu","orcid":"https://orcid.org/0000-0002-0821-8640"},"institutions":[{"id":"https://openalex.org/I6902469","display_name":"Brandeis University","ror":"https://ror.org/05abbep66","country_code":"US","type":"education","lineage":["https://openalex.org/I6902469"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Hongfu Liu","raw_affiliation_strings":["Brandeis University"],"affiliations":[{"raw_affiliation_string":"Brandeis University","institution_ids":["https://openalex.org/I6902469"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5103167145"],"corresponding_institution_ids":["https://openalex.org/I6902469"],"apc_list":null,"apc_paid":null,"fwci":8.8383,"has_fulltext":false,"cited_by_count":116,"citation_normalized_percentile":{"value":0.98501634,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":95,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"5648","last_page":"5656"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9976999759674072,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9916999936103821,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8680431246757507},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.599036693572998},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.5673764944076538},{"id":"https://openalex.org/keywords/feature-learning","display_name":"Feature learning","score":0.5495565533638},{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.46777668595314026},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.42962029576301575},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4292251467704773}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8680431246757507},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.599036693572998},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.5673764944076538},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.5495565533638},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.46777668595314026},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.42962029576301575},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4292251467704773},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/cvpr46437.2021.00560","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cvpr46437.2021.00560","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.8600000143051147}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":59,"referenced_works":["https://openalex.org/W58456790","https://openalex.org/W1536680647","https://openalex.org/W1686810756","https://openalex.org/W1964101267","https://openalex.org/W1966382373","https://openalex.org/W2073459066","https://openalex.org/W2095818033","https://openalex.org/W2163113711","https://openalex.org/W2549139847","https://openalex.org/W2613718673","https://openalex.org/W2623808377","https://openalex.org/W2787560479","https://openalex.org/W2891117443","https://openalex.org/W2896457183","https://openalex.org/W2908510526","https://openalex.org/W2922714365","https://openalex.org/W2949370368","https://openalex.org/W2953106684","https://openalex.org/W2962739339","https://openalex.org/W2962772269","https://openalex.org/W2962835968","https://openalex.org/W2963341956","https://openalex.org/W2963403868","https://openalex.org/W2964346820","https://openalex.org/W2965373594","https://openalex.org/W2966715458","https://openalex.org/W2970231061","https://openalex.org/W2970608575","https://openalex.org/W2970641574","https://openalex.org/W2986619406","https://openalex.org/W2996480099","https://openalex.org/W2997154779","https://openalex.org/W3003273206","https://openalex.org/W3003711898","https://openalex.org/W3015468748","https://openalex.org/W3080523870","https://openalex.org/W3090669478","https://openalex.org/W3104953317","https://openalex.org/W3106301701","https://openalex.org/W3120043490","https://openalex.org/W4287657829","https://openalex.org/W4385245566","https://openalex.org/W6602403377","https://openalex.org/W6620707391","https://openalex.org/W6637373629","https://openalex.org/W6668990524","https://openalex.org/W6739901393","https://openalex.org/W6755207826","https://openalex.org/W6757817989","https://openalex.org/W6766673545","https://openalex.org/W6766904570","https://openalex.org/W6767333191","https://openalex.org/W6771753343","https://openalex.org/W6776048684","https://openalex.org/W6776545001","https://openalex.org/W6780226713","https://openalex.org/W6780502592","https://openalex.org/W6783817627","https://openalex.org/W6786666402"],"related_works":["https://openalex.org/W17155033","https://openalex.org/W3207760230","https://openalex.org/W1496222301","https://openalex.org/W1590307681","https://openalex.org/W2536018345","https://openalex.org/W4312814274","https://openalex.org/W4285370786","https://openalex.org/W2296488620","https://openalex.org/W2358353312","https://openalex.org/W2353836703"],"abstract_inverted_index":{"We":[0],"propose":[1,95],"SelfDoc,":[2],"a":[3,36,96,124],"task-agnostic":[4],"pre-training":[5,51,81,117,145],"framework":[6,22,113],"for":[7,18,101],"document":[8,50,140],"image":[9],"understanding.":[10],"Because":[11],"documents":[12,119],"are":[13,16],"multimodal":[14,86,102],"and":[15,27,38,109],"intended":[17],"sequential":[19],"reading,":[20],"our":[21,53],"exploits":[23],"the":[24,41,79,144],"positional,":[25],"textual,":[26],"visual":[28],"information":[29,87],"of":[30,46,58],"every":[31],"semantically":[32],"meaningful":[33],"component":[34],"in":[35,78,143],"document,":[37],"it":[39],"models":[40],"contextualization":[42],"between":[43],"each":[44],"block":[45],"content.":[47],"Unlike":[48],"existing":[49],"models,":[52],"model":[54,80],"is":[55],"coarse-grained":[56],"instead":[57],"treating":[59],"individual":[60],"words":[61],"as":[62],"input,":[63],"therefore":[64],"avoiding":[65],"an":[66],"overly":[67],"fine-grained":[68],"with":[69,137],"excessive":[70],"contextualization.":[71],"Beyond":[72],"that,":[73],"we":[74,94],"introduce":[75],"cross-modal":[76],"learning":[77],"phase":[82],"to":[83,148],"fully":[84],"leverage":[85],"from":[88,115],"unlabeled":[89],"documents.":[90],"For":[91],"downstream":[92,135],"usage,":[93],"novel":[97],"modality-adaptive":[98],"attention":[99],"mechanism":[100],"feature":[103,125],"fusion":[104],"by":[105,123],"adaptively":[106],"emphasizing":[107],"language":[108],"vision":[110],"signals.":[111],"Our":[112],"benefits":[114],"self-supervised":[116],"on":[118,133],"without":[120],"requiring":[121],"annotations":[122],"masking":[126],"training":[127],"strategy.":[128],"It":[129],"achieves":[130],"superior":[131],"performance":[132],"multiple":[134],"tasks":[136],"significantly":[138],"fewer":[139],"images":[141],"used":[142],"stage":[146],"compared":[147],"previous":[149],"works.":[150]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":23},{"year":2024,"cited_by_count":36},{"year":2023,"cited_by_count":37},{"year":2022,"cited_by_count":16},{"year":2021,"cited_by_count":3}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
