{"id":"https://openalex.org/W2061598441","doi":"https://doi.org/10.1145/2494266.2494271","title":"PDFX","display_name":"PDFX","publication_year":2013,"publication_date":"2013-09-03","ids":{"openalex":"https://openalex.org/W2061598441","doi":"https://doi.org/10.1145/2494266.2494271","mag":"2061598441"},"language":"en","primary_location":{"id":"doi:10.1145/2494266.2494271","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2494266.2494271","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2013 ACM symposium on Document engineering","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://www.research.manchester.ac.uk/portal/en/publications/pdfx-fullyautomated-pdftoxml-conversion-of-scientific-literature(8d09db22-4d6e-4781-95cc-9aa844a641f7).html","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5026937335","display_name":"Alexandru Constantin","orcid":null},"institutions":[{"id":"https://openalex.org/I28407311","display_name":"University of Manchester","ror":"https://ror.org/027m9bs27","country_code":"GB","type":"education","lineage":["https://openalex.org/I28407311"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Alexandru Constantin","raw_affiliation_strings":["The University of Manchester, Manchester, United Kingdom"],"affiliations":[{"raw_affiliation_string":"The University of Manchester, Manchester, United Kingdom","institution_ids":["https://openalex.org/I28407311"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5081272754","display_name":"Steve Pettifer","orcid":"https://orcid.org/0000-0002-1809-5621"},"institutions":[{"id":"https://openalex.org/I28407311","display_name":"University of Manchester","ror":"https://ror.org/027m9bs27","country_code":"GB","type":"education","lineage":["https://openalex.org/I28407311"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Steve Pettifer","raw_affiliation_strings":["The University of Manchester, Manchester, United Kingdom"],"affiliations":[{"raw_affiliation_string":"The University of Manchester, Manchester, United Kingdom","institution_ids":["https://openalex.org/I28407311"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5054629753","display_name":"\u0410\u043d\u0434\u0440\u0435\u0439 \u0412\u043e\u0440\u043e\u043d\u043a\u043e\u0432","orcid":"https://orcid.org/0009-0009-4367-4885"},"institutions":[{"id":"https://openalex.org/I28407311","display_name":"University of Manchester","ror":"https://ror.org/027m9bs27","country_code":"GB","type":"education","lineage":["https://openalex.org/I28407311"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Andrei Voronkov","raw_affiliation_strings":["The University of Manchester, Manchester, United Kingdom"],"affiliations":[{"raw_affiliation_string":"The University of Manchester, Manchester, United Kingdom","institution_ids":["https://openalex.org/I28407311"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5026937335"],"corresponding_institution_ids":["https://openalex.org/I28407311"],"apc_list":null,"apc_paid":null,"fwci":3.8016,"has_fulltext":false,"cited_by_count":91,"citation_normalized_percentile":{"value":0.94112145,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"177","last_page":"180"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.9959999918937683,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.9959999918937683,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T13523","display_name":"Mathematics, Computing, and Information Processing","score":0.9950000047683716,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10215","display_name":"Semantic Web and Ontologies","score":0.9943000078201294,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8027874231338501},{"id":"https://openalex.org/keywords/paragraph","display_name":"Paragraph","score":0.7601490020751953},{"id":"https://openalex.org/keywords/disk-formatting","display_name":"Disk formatting","score":0.6512441635131836},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.5695580840110779},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.5662776231765747},{"id":"https://openalex.org/keywords/table","display_name":"Table (database)","score":0.541856586933136},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5401197671890259},{"id":"https://openalex.org/keywords/xml","display_name":"XML","score":0.5046836137771606},{"id":"https://openalex.org/keywords/identification","display_name":"Identification (biology)","score":0.48668962717056274},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.48325568437576294},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4496935307979584},{"id":"https://openalex.org/keywords/column","display_name":"Column (typography)","score":0.44756945967674255},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.43274569511413574},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.35060158371925354},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.2370166778564453},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.20940369367599487},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.17802372574806213}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8027874231338501},{"id":"https://openalex.org/C2777206241","wikidata":"https://www.wikidata.org/wiki/Q194431","display_name":"Paragraph","level":2,"score":0.7601490020751953},{"id":"https://openalex.org/C88006597","wikidata":"https://www.wikidata.org/wiki/Q690117","display_name":"Disk formatting","level":2,"score":0.6512441635131836},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.5695580840110779},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.5662776231765747},{"id":"https://openalex.org/C45235069","wikidata":"https://www.wikidata.org/wiki/Q278425","display_name":"Table (database)","level":2,"score":0.541856586933136},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5401197671890259},{"id":"https://openalex.org/C8797682","wikidata":"https://www.wikidata.org/wiki/Q2115","display_name":"XML","level":2,"score":0.5046836137771606},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.48668962717056274},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.48325568437576294},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4496935307979584},{"id":"https://openalex.org/C2780551164","wikidata":"https://www.wikidata.org/wiki/Q2306599","display_name":"Column (typography)","level":3,"score":0.44756945967674255},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.43274569511413574},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.35060158371925354},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.2370166778564453},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.20940369367599487},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.17802372574806213},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C59822182","wikidata":"https://www.wikidata.org/wiki/Q441","display_name":"Botany","level":1,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1145/2494266.2494271","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2494266.2494271","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2013 ACM symposium on Document engineering","raw_type":"proceedings-article"},{"id":"pmh:oai:pure.atira.dk:openaire_cris_publications/8d09db22-4d6e-4781-95cc-9aa844a641f7","is_oa":false,"landing_page_url":"https://research.manchester.ac.uk/en/publications/8d09db22-4d6e-4781-95cc-9aa844a641f7","pdf_url":null,"source":{"id":"https://openalex.org/S4306400662","display_name":"Research Explorer (The University of Manchester)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I28407311","host_organization_name":"University of Manchester","host_organization_lineage":["https://openalex.org/I28407311"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Constantin, A, Pettifer, S & Voronkov, A 2013, {PDFX:} fully-automated PDF-to-XML conversion of scientific literature. in {ACM} Symposium on Document Engineering 2013, DocEng '13, Florence, Italy, September 10-13, 2013. Florence, Italy, pp. 177-180, 2013 ACM Symposium on Document Engineering, DocEng 2013, Florence, 1/07/13. https://doi.org/10.1145/2494266.2494271","raw_type":"info:eu-repo/semantics/publishedVersion"},{"id":"pmh:oai:pure.atira.dk:publications/8d09db22-4d6e-4781-95cc-9aa844a641f7","is_oa":true,"landing_page_url":"http://doi.acm.org/10.1145/2494266.2494271","pdf_url":"https://www.research.manchester.ac.uk/portal/en/publications/pdfx-fullyautomated-pdftoxml-conversion-of-scientific-literature(8d09db22-4d6e-4781-95cc-9aa844a641f7).html","source":{"id":"https://openalex.org/S4306400662","display_name":"Research Explorer (The University of Manchester)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I28407311","host_organization_name":"University of Manchester","host_organization_lineage":["https://openalex.org/I28407311"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":""},{"id":"pmh:oai:pure.atira.dk:Publications/8d09db22-4d6e-4781-95cc-9aa844a641f7","is_oa":false,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4306400662","display_name":"Research Explorer (The University of Manchester)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I28407311","host_organization_name":"University of Manchester","host_organization_lineage":["https://openalex.org/I28407311"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":""}],"best_oa_location":{"id":"pmh:oai:pure.atira.dk:publications/8d09db22-4d6e-4781-95cc-9aa844a641f7","is_oa":true,"landing_page_url":"http://doi.acm.org/10.1145/2494266.2494271","pdf_url":"https://www.research.manchester.ac.uk/portal/en/publications/pdfx-fullyautomated-pdftoxml-conversion-of-scientific-literature(8d09db22-4d6e-4781-95cc-9aa844a641f7).html","source":{"id":"https://openalex.org/S4306400662","display_name":"Research Explorer (The University of Manchester)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I28407311","host_organization_name":"University of Manchester","host_organization_lineage":["https://openalex.org/I28407311"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":""},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.8100000023841858,"display_name":"Quality Education"}],"awards":[{"id":"https://openalex.org/G1522051408","display_name":null,"funder_award_id":"EP/K032674/1","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"}],"funders":[{"id":"https://openalex.org/F4320334627","display_name":"Engineering and Physical Sciences Research Council","ror":"https://ror.org/0439y7842"}],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2061598441.pdf"},"referenced_works_count":11,"referenced_works":["https://openalex.org/W1530730108","https://openalex.org/W1559499673","https://openalex.org/W1768944425","https://openalex.org/W2064354603","https://openalex.org/W2097056536","https://openalex.org/W2147107544","https://openalex.org/W2163187547","https://openalex.org/W2166633748","https://openalex.org/W2288271880","https://openalex.org/W2612388341","https://openalex.org/W6638165513"],"related_works":["https://openalex.org/W4244466418","https://openalex.org/W2377059580","https://openalex.org/W2104062382","https://openalex.org/W4200355488","https://openalex.org/W2162878363","https://openalex.org/W2389021890","https://openalex.org/W127000293","https://openalex.org/W3215892509","https://openalex.org/W4242806677","https://openalex.org/W4231366852"],"abstract_inverted_index":{"PDFX":[0,129],"is":[1,25,69,148],"a":[2,91,118,131],"rule-based":[3],"system":[4,95],"designed":[5],"to":[6,48],"reconstruct":[7],"the":[8,31,53,66,71,98],"logical":[9,34],"structure":[10,35],"of":[11,18,38,65,85,122],"scholarly":[12],"articles":[13,123],"in":[14,36,52,113],"PDF":[15],"form,":[16],"regardless":[17],"their":[19],"formatting":[20],"style.":[21],"The":[22,62,94,146],"system's":[23],"output":[24],"an":[26],"XML":[27],"document":[28],"that":[29,70],"describes":[30],"input":[32],"article's":[33],"terms":[37],"title,":[39],"sections,":[40],"tables,":[41],"references,":[42],"etc.":[43],"and":[44,59,82,139],"also":[45],"links":[46],"it":[47],"geometrical":[49],"typesetting":[50],"markers":[51],"original":[54],"PDF,":[55],"such":[56],"as":[57],"paragraph":[58],"column":[60],"breaks.":[61],"key":[63],"aspect":[64],"presented":[67],"approach":[68],"rule":[72],"set":[73],"used":[74],"relies":[75],"on":[76,90],"relative":[77],"parameters":[78],"derived":[79],"from":[80,124],"font":[81],"layout":[83],"specifics":[84],"each":[86],"article,":[87],"rather":[88],"than":[89],"template-matching":[92],"paradigm.":[93],"thus":[96],"obviates":[97],"need":[99],"for":[100,135,141,151],"domain-":[101],"or":[102,105],"layout-specific":[103],"tuning":[104],"prior":[106],"training,":[107],"exploiting":[108],"only":[109],"typographical":[110],"conventions":[111],"inherent":[112],"scientific":[114],"literature.":[115],"Evaluated":[116],"against":[117],"significantly":[119],"varied":[120],"corpus":[121],"nearly":[125],"2000":[126],"different":[127],"journals,":[128],"gives":[130],"77.45":[132],"F1":[133],"measure":[134],"top-level":[136],"heading":[137],"identification":[138],"74.03":[140],"extracting":[142],"individual":[143],"bibliographic":[144],"items.":[145],"service":[147],"freely":[149],"available":[150],"use":[152],"at":[153],"http://pdfx.cs.man.ac.uk/.":[154]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":5},{"year":2023,"cited_by_count":3},{"year":2022,"cited_by_count":7},{"year":2021,"cited_by_count":8},{"year":2020,"cited_by_count":10},{"year":2019,"cited_by_count":11},{"year":2018,"cited_by_count":11},{"year":2017,"cited_by_count":7},{"year":2016,"cited_by_count":12},{"year":2015,"cited_by_count":6},{"year":2014,"cited_by_count":7},{"year":2013,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2016-06-24T00:00:00"}
