{"id":"https://openalex.org/W4384891056","doi":"https://doi.org/10.1145/3539618.3591975","title":"DocGraphLM: Documental Graph Language Model for Information Extraction","display_name":"DocGraphLM: Documental Graph Language Model for Information Extraction","publication_year":2023,"publication_date":"2023-07-18","ids":{"openalex":"https://openalex.org/W4384891056","doi":"https://doi.org/10.1145/3539618.3591975"},"language":"en","primary_location":{"id":"doi:10.1145/3539618.3591975","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3539618.3591975","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 46th International ACM SIGIR Conference on Research and Development in Information Retrieval","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5053995579","display_name":"Dongsheng Wang","orcid":"https://orcid.org/0000-0002-5806-3894"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Dongsheng Wang","raw_affiliation_strings":["JPMorgan AI Research, London, United Kingdom"],"affiliations":[{"raw_affiliation_string":"JPMorgan AI Research, London, United Kingdom","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5055644473","display_name":"Zhiqiang Ma","orcid":null},"institutions":[{"id":"https://openalex.org/I1305429384","display_name":"JPMorgan Chase & Co (United States)","ror":"https://ror.org/01x3kkr08","country_code":"US","type":"company","lineage":["https://openalex.org/I1305429384"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Zhiqiang Ma","raw_affiliation_strings":["JPMorgan AI Research, New York, NY, USA"],"affiliations":[{"raw_affiliation_string":"JPMorgan AI Research, New York, NY, USA","institution_ids":["https://openalex.org/I1305429384"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5062396463","display_name":"Armineh Nourbakhsh","orcid":"https://orcid.org/0009-0004-1908-8679"},"institutions":[{"id":"https://openalex.org/I1305429384","display_name":"JPMorgan Chase & Co (United States)","ror":"https://ror.org/01x3kkr08","country_code":"US","type":"company","lineage":["https://openalex.org/I1305429384"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Armineh Nourbakhsh","raw_affiliation_strings":["JPMorgan AI Research, New York, NY, USA"],"affiliations":[{"raw_affiliation_string":"JPMorgan AI Research, New York, NY, USA","institution_ids":["https://openalex.org/I1305429384"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103123155","display_name":"Kang Gu","orcid":"https://orcid.org/0000-0002-8638-3510"},"institutions":[{"id":"https://openalex.org/I107672454","display_name":"Dartmouth College","ror":"https://ror.org/049s0rh22","country_code":"US","type":"education","lineage":["https://openalex.org/I107672454"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Kang Gu","raw_affiliation_strings":["Dartmouth College, Hanover, NH, USA"],"affiliations":[{"raw_affiliation_string":"Dartmouth College, Hanover, NH, USA","institution_ids":["https://openalex.org/I107672454"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5087647155","display_name":"Sameena Shah","orcid":"https://orcid.org/0009-0000-5960-5811"},"institutions":[{"id":"https://openalex.org/I1305429384","display_name":"JPMorgan Chase & Co (United States)","ror":"https://ror.org/01x3kkr08","country_code":"US","type":"company","lineage":["https://openalex.org/I1305429384"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Sameena Shah","raw_affiliation_strings":["JPMorgan AI Research, New York, NY, USA"],"affiliations":[{"raw_affiliation_string":"JPMorgan AI Research, New York, NY, USA","institution_ids":["https://openalex.org/I1305429384"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5053995579"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":2.0979,"has_fulltext":false,"cited_by_count":12,"citation_normalized_percentile":{"value":0.89550338,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1944","last_page":"1948"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9922000169754028,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7820090055465698},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.5750057101249695},{"id":"https://openalex.org/keywords/graph","display_name":"Graph","score":0.5615884065628052},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4432603120803833},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.4351416528224945},{"id":"https://openalex.org/keywords/architecture","display_name":"Architecture","score":0.4283774793148041},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.37084266543388367},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.35977810621261597},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.3524906039237976},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.34368887543678284}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7820090055465698},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.5750057101249695},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.5615884065628052},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4432603120803833},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.4351416528224945},{"id":"https://openalex.org/C123657996","wikidata":"https://www.wikidata.org/wiki/Q12271","display_name":"Architecture","level":2,"score":0.4283774793148041},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.37084266543388367},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.35977810621261597},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.3524906039237976},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.34368887543678284},{"id":"https://openalex.org/C153349607","wikidata":"https://www.wikidata.org/wiki/Q36649","display_name":"Visual arts","level":1,"score":0.0},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3539618.3591975","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3539618.3591975","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 46th International ACM SIGIR Conference on Research and Development in Information Retrieval","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/11","display_name":"Sustainable cities and communities","score":0.6000000238418579}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":12,"referenced_works":["https://openalex.org/W2962946486","https://openalex.org/W2986619406","https://openalex.org/W2997154779","https://openalex.org/W3034864438","https://openalex.org/W3104953317","https://openalex.org/W3120043490","https://openalex.org/W3163650427","https://openalex.org/W3176664887","https://openalex.org/W3182680257","https://openalex.org/W3205981739","https://openalex.org/W4304013646","https://openalex.org/W6600013530"],"related_works":["https://openalex.org/W4390516098","https://openalex.org/W2181948922","https://openalex.org/W2384362569","https://openalex.org/W2142795561","https://openalex.org/W4205302943","https://openalex.org/W2561132942","https://openalex.org/W3155418658","https://openalex.org/W4243199227","https://openalex.org/W2379948177","https://openalex.org/W2334580170"],"abstract_inverted_index":{"Advances":[0],"in":[1,127],"Visually":[2],"Rich":[3],"Document":[4],"Understanding":[5],"(VrDU)":[6],"have":[7,23],"enabled":[8],"information":[9],"extraction":[10],"and":[11,29,63,78,92,108],"question":[12],"answering":[13],"over":[14],"documents":[15],"with":[16,47,111],"complex":[17],"layouts.":[18],"Two":[19],"tropes":[20],"of":[21,114],"architectures":[22],"emerged-transformer-based":[24],"models":[25,46],"inspired":[26],"by":[27],"LLMs,":[28],"Graph":[30],"Neural":[31],"Networks.":[32],"In":[33],"this":[34],"paper,":[35],"we":[36,53,118],"introduce":[37],"DocGraphLM,":[38],"a":[39,56,65,83],"novel":[40,66],"framework":[41],"that":[42,88,120],"combines":[43],"pre-trained":[44],"language":[45],"graph":[48,115,123],"semantics.":[49],"To":[50],"achieve":[51],"this,":[52],"propose":[54],"1)":[55],"joint":[57,85],"encoder":[58],"architecture":[59],"to":[60,70],"represent":[61],"documents,":[62],"2)":[64],"link":[67,138],"prediction":[68],"approach":[69],"reconstruct":[71],"document":[72],"graphs.":[73],"DocGraphLM":[74],"predicts":[75],"both":[76],"directions":[77],"distances":[79],"between":[80],"nodes":[81],"using":[82],"convergent":[84],"loss":[86],"function":[87],"prioritizes":[89],"neighborhood":[90],"restoration":[91],"downweighs":[93],"distant":[94],"node":[95],"detection.":[96],"Our":[97],"experiments":[98],"on":[99,106],"three":[100],"SotA":[101],"datasets":[102],"show":[103],"consistent":[104],"improvement":[105],"IE":[107],"QA":[109],"tasks":[110],"the":[112,122,128],"adoption":[113],"features.":[116],"Moreover,":[117],"report":[119],"adopting":[121],"features":[124],"accelerates":[125],"convergence":[126],"learning":[129],"process":[130],"druing":[131],"training,":[132],"despite":[133],"being":[134],"solely":[135],"constructed":[136],"through":[137],"prediction.":[139]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":8},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
