{"id":"https://openalex.org/W4226196222","doi":"https://doi.org/10.1162/tacl_a_00466","title":"VILA: Improving Structured Content Extraction from Scientific PDFs Using Visual Layout Groups","display_name":"VILA: Improving Structured Content Extraction from Scientific PDFs Using Visual Layout Groups","publication_year":2022,"publication_date":"2022-01-01","ids":{"openalex":"https://openalex.org/W4226196222","doi":"https://doi.org/10.1162/tacl_a_00466"},"language":"en","primary_location":{"id":"doi:10.1162/tacl_a_00466","is_oa":true,"landing_page_url":"https://doi.org/10.1162/tacl_a_00466","pdf_url":"https://direct.mit.edu/tacl/article-pdf/doi/10.1162/tacl_a_00466/2006993/tacl_a_00466.pdf","source":{"id":"https://openalex.org/S2729999759","display_name":"Transactions of the Association for Computational Linguistics","issn_l":"2307-387X","issn":["2307-387X"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320244","host_organization_name":"Association for Computational Linguistics","host_organization_lineage":["https://openalex.org/P4310320244"],"host_organization_lineage_names":["Association for Computational Linguistics"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Transactions of the Association for Computational Linguistics","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://direct.mit.edu/tacl/article-pdf/doi/10.1162/tacl_a_00466/2006993/tacl_a_00466.pdf","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5038732616","display_name":"Zejiang Shen","orcid":null},"institutions":[{"id":"https://openalex.org/I4210140341","display_name":"Allen Institute","ror":"https://ror.org/03cpe7c52","country_code":"US","type":"nonprofit","lineage":["https://openalex.org/I4210140341"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Zejiang Shen","raw_affiliation_strings":["Allen Institute for AI, USA. shannons@allenai.org"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Allen Institute for AI, USA. shannons@allenai.org","institution_ids":["https://openalex.org/I4210140341"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5066588555","display_name":"Kyle Lo","orcid":"https://orcid.org/0000-0002-1804-2853"},"institutions":[{"id":"https://openalex.org/I4210140341","display_name":"Allen Institute","ror":"https://ror.org/03cpe7c52","country_code":"US","type":"nonprofit","lineage":["https://openalex.org/I4210140341"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Kyle Lo","raw_affiliation_strings":["Allen Institute for AI, USA. kylel@allenai.org"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Allen Institute for AI, USA. kylel@allenai.org","institution_ids":["https://openalex.org/I4210140341"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5001778694","display_name":"Lucy Lu Wang","orcid":"https://orcid.org/0000-0001-8752-6635"},"institutions":[{"id":"https://openalex.org/I4210140341","display_name":"Allen Institute","ror":"https://ror.org/03cpe7c52","country_code":"US","type":"nonprofit","lineage":["https://openalex.org/I4210140341"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Lucy Lu Wang","raw_affiliation_strings":["Allen Institute for AI, USA. lucyw@allenai.org"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Allen Institute for AI, USA. lucyw@allenai.org","institution_ids":["https://openalex.org/I4210140341"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5037115259","display_name":"Bailey Kuehl","orcid":null},"institutions":[{"id":"https://openalex.org/I4210140341","display_name":"Allen Institute","ror":"https://ror.org/03cpe7c52","country_code":"US","type":"nonprofit","lineage":["https://openalex.org/I4210140341"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Bailey Kuehl","raw_affiliation_strings":["Allen Institute for AI, USA. baileyk@allenai.org"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Allen Institute for AI, USA. baileyk@allenai.org","institution_ids":["https://openalex.org/I4210140341"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5085011940","display_name":"Daniel S. Weld","orcid":"https://orcid.org/0000-0002-3255-0109"},"institutions":[{"id":"https://openalex.org/I4210140341","display_name":"Allen Institute","ror":"https://ror.org/03cpe7c52","country_code":"US","type":"nonprofit","lineage":["https://openalex.org/I4210140341"]},{"id":"https://openalex.org/I201448701","display_name":"University of Washington","ror":"https://ror.org/00cvxb145","country_code":"US","type":"education","lineage":["https://openalex.org/I201448701"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Daniel S. Weld","raw_affiliation_strings":["Allen Institute for AI, USA","University of Washington, USA. danw@allenai.org"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Allen Institute for AI, USA","institution_ids":["https://openalex.org/I4210140341"]},{"raw_affiliation_string":"University of Washington, USA. danw@allenai.org","institution_ids":["https://openalex.org/I201448701"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5043450042","display_name":"Doug Downey","orcid":"https://orcid.org/0000-0002-4737-8444"},"institutions":[{"id":"https://openalex.org/I111979921","display_name":"Northwestern University","ror":"https://ror.org/000e0be47","country_code":"US","type":"education","lineage":["https://openalex.org/I111979921"]},{"id":"https://openalex.org/I4210140341","display_name":"Allen Institute","ror":"https://ror.org/03cpe7c52","country_code":"US","type":"nonprofit","lineage":["https://openalex.org/I4210140341"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Doug Downey","raw_affiliation_strings":["Allen Institute for AI, USA","Northwestern University, USA. dougd@allenai.org"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Allen Institute for AI, USA","institution_ids":["https://openalex.org/I4210140341"]},{"raw_affiliation_string":"Northwestern University, USA. dougd@allenai.org","institution_ids":["https://openalex.org/I111979921"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5001778694","https://openalex.org/A5037115259","https://openalex.org/A5038732616","https://openalex.org/A5043450042","https://openalex.org/A5066588555","https://openalex.org/A5085011940"],"corresponding_institution_ids":["https://openalex.org/I111979921","https://openalex.org/I201448701","https://openalex.org/I4210140341"],"apc_list":null,"apc_paid":null,"fwci":3.8884,"has_fulltext":false,"cited_by_count":30,"citation_normalized_percentile":{"value":0.94165776,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":99},"biblio":{"volume":"10","issue":null,"first_page":"376","last_page":"392"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9958000183105469,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13083","display_name":"Advanced Text Analysis Techniques","score":0.9897000193595886,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8794931769371033},{"id":"https://openalex.org/keywords/security-token","display_name":"Security token","score":0.8381085395812988},{"id":"https://openalex.org/keywords/macro","display_name":"Macro","score":0.7147767543792725},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.6505643725395203},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.5213421583175659},{"id":"https://openalex.org/keywords/suite","display_name":"Suite","score":0.513095498085022},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.4792878031730652},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.466264545917511},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4153635799884796},{"id":"https://openalex.org/keywords/source-code","display_name":"Source code","score":0.41424983739852905},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.36745160818099976},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.33163511753082275},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.1354479193687439},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.08507838845252991}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8794931769371033},{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.8381085395812988},{"id":"https://openalex.org/C166955791","wikidata":"https://www.wikidata.org/wiki/Q629579","display_name":"Macro","level":2,"score":0.7147767543792725},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.6505643725395203},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.5213421583175659},{"id":"https://openalex.org/C79581498","wikidata":"https://www.wikidata.org/wiki/Q1367530","display_name":"Suite","level":2,"score":0.513095498085022},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.4792878031730652},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.466264545917511},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4153635799884796},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.41424983739852905},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.36745160818099976},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.33163511753082275},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.1354479193687439},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.08507838845252991},{"id":"https://openalex.org/C95457728","wikidata":"https://www.wikidata.org/wiki/Q309","display_name":"History","level":0,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C166957645","wikidata":"https://www.wikidata.org/wiki/Q23498","display_name":"Archaeology","level":1,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1162/tacl_a_00466","is_oa":true,"landing_page_url":"https://doi.org/10.1162/tacl_a_00466","pdf_url":"https://direct.mit.edu/tacl/article-pdf/doi/10.1162/tacl_a_00466/2006993/tacl_a_00466.pdf","source":{"id":"https://openalex.org/S2729999759","display_name":"Transactions of the Association for Computational Linguistics","issn_l":"2307-387X","issn":["2307-387X"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320244","host_organization_name":"Association for Computational Linguistics","host_organization_lineage":["https://openalex.org/P4310320244"],"host_organization_lineage_names":["Association for Computational Linguistics"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Transactions of the Association for Computational Linguistics","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:8391c75e305f49999ff12c1d2cd19316","is_oa":false,"landing_page_url":"https://doaj.org/article/8391c75e305f49999ff12c1d2cd19316","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Transactions of the Association for Computational Linguistics, Vol 10, Pp 376-392 (2022)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1162/tacl_a_00466","is_oa":true,"landing_page_url":"https://doi.org/10.1162/tacl_a_00466","pdf_url":"https://direct.mit.edu/tacl/article-pdf/doi/10.1162/tacl_a_00466/2006993/tacl_a_00466.pdf","source":{"id":"https://openalex.org/S2729999759","display_name":"Transactions of the Association for Computational Linguistics","issn_l":"2307-387X","issn":["2307-387X"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320244","host_organization_name":"Association for Computational Linguistics","host_organization_lineage":["https://openalex.org/P4310320244"],"host_organization_lineage_names":["Association for Computational Linguistics"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Transactions of the Association for Computational Linguistics","raw_type":"journal-article"},"sustainable_development_goals":[{"score":0.7900000214576721,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4226196222.pdf","grobid_xml":"https://content.openalex.org/works/W4226196222.grobid-xml"},"referenced_works_count":49,"referenced_works":["https://openalex.org/W639708223","https://openalex.org/W791527587","https://openalex.org/W1997754546","https://openalex.org/W2064675550","https://openalex.org/W2483327705","https://openalex.org/W2612690371","https://openalex.org/W2795424778","https://openalex.org/W2801930304","https://openalex.org/W2911964244","https://openalex.org/W2962785754","https://openalex.org/W2965373594","https://openalex.org/W2970771982","https://openalex.org/W2978017171","https://openalex.org/W2979826702","https://openalex.org/W2997154779","https://openalex.org/W2999905431","https://openalex.org/W3003711898","https://openalex.org/W3015453090","https://openalex.org/W3019932981","https://openalex.org/W3020786614","https://openalex.org/W3093838622","https://openalex.org/W3103188966","https://openalex.org/W3104018737","https://openalex.org/W3104049173","https://openalex.org/W3112776819","https://openalex.org/W3113463745","https://openalex.org/W3173558867","https://openalex.org/W3197055326","https://openalex.org/W3202466114","https://openalex.org/W3214897310","https://openalex.org/W4287194852","https://openalex.org/W4289751790","https://openalex.org/W4301409532","https://openalex.org/W6620707391","https://openalex.org/W6631190155","https://openalex.org/W6682082992","https://openalex.org/W6735463952","https://openalex.org/W6739901393","https://openalex.org/W6745245109","https://openalex.org/W6755207826","https://openalex.org/W6757817989","https://openalex.org/W6766673545","https://openalex.org/W6766978945","https://openalex.org/W6768851824","https://openalex.org/W6776225533","https://openalex.org/W6778206941","https://openalex.org/W6790963376","https://openalex.org/W6794926475","https://openalex.org/W6797048850"],"related_works":["https://openalex.org/W4231704780","https://openalex.org/W2030816003","https://openalex.org/W2083794993","https://openalex.org/W352609212","https://openalex.org/W4239992647","https://openalex.org/W2150013480","https://openalex.org/W1511772879","https://openalex.org/W1554458299","https://openalex.org/W2076325756","https://openalex.org/W2001919569"],"abstract_inverted_index":{"Abstract":[0],"Accurately":[1],"extracting":[2],"structured":[3],"content":[4],"from":[5,171],"PDFs":[6],"is":[7],"a":[8,84,148,162],"critical":[9],"first":[10],"step":[11],"for":[12,28],"NLP":[13],"over":[14],"scientific":[15,173],"papers.":[16],"Recent":[17],"work":[18],"has":[19],"improved":[20],"extraction":[21],"accuracy":[22],"by":[23,140],"incorporating":[24],"elementary":[25],"layout":[26,75],"information,":[27],"example,":[29],"each":[30],"token\u2019s":[31],"2D":[32],"position":[33],"on":[34,147],"the":[35,93],"page,":[36],"into":[37,78],"language":[38],"model":[39,47,79],"pretraining.":[40],"We":[41],"introduce":[42],"new":[43,163],"methods":[44,124],"that":[45,52,69,98,154],"explicitly":[46],"VIsual":[48],"LAyout":[49],"(VILA)":[50],"groups,":[51],"is,":[53],"text":[54,57],"lines":[55],"or":[56],"blocks,":[58],"to":[59,83,107,142],"further":[60],"improve":[61],"performance.":[62],"In":[63,92],"our":[64,123],"I-VILA":[65],"approach,":[66,95],"we":[67,96,134],"show":[68,97,135],"simply":[70],"inserting":[71],"special":[72],"tokens":[73],"denoting":[74],"group":[76],"boundaries":[77],"inputs":[80],"can":[81,103,136],"lead":[82],"1.9%":[85],"Macro":[86,116],"F1":[87,117],"improvement":[88],"in":[89,105],"token":[90],"classification.":[91],"H-VILA":[94],"hierarchical":[99],"encoding":[100],"of":[101,165],"layout-groups":[102],"result":[104],"up":[106,141],"47%":[108],"inference":[109],"time":[110],"reduction":[111],"with":[112],"less":[113],"than":[114],"0.8%":[115],"loss.":[118],"Unlike":[119],"prior":[120],"layout-aware":[121],"approaches,":[122],"do":[125],"not":[126],"require":[127],"expensive":[128],"additional":[129],"pretraining,":[130],"only":[131],"fine-tuning,":[132],"which":[133],"reduce":[137],"training":[138],"cost":[139],"95%.":[143],"Experiments":[144],"are":[145,182],"conducted":[146],"newly":[149],"curated":[150],"evaluation":[151],"suite,":[152],"S2-VLUE,":[153],"unifies":[155],"existing":[156],"automatically":[157],"labeled":[158],"datasets":[159],"and":[160,179],"includes":[161],"dataset":[164],"manual":[166],"annotations":[167],"covering":[168],"diverse":[169],"papers":[170],"19":[172],"disciplines.":[174],"Pre-trained":[175],"weights,":[176],"benchmark":[177],"datasets,":[178],"source":[180],"code":[181],"available":[183],"at":[184],"https://github.com/allenai/VILA.":[185]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":5},{"year":2024,"cited_by_count":8},{"year":2023,"cited_by_count":12},{"year":2022,"cited_by_count":3}],"updated_date":"2026-05-06T08:25:59.206177","created_date":"2025-10-10T00:00:00"}
