{"id":"https://openalex.org/W1530730108","doi":"https://doi.org/10.1007/11669487_12","title":"A System for Converting PDF Documents into Structured XML Format","display_name":"A System for Converting PDF Documents into Structured XML Format","publication_year":2006,"publication_date":"2006-01-01","ids":{"openalex":"https://openalex.org/W1530730108","doi":"https://doi.org/10.1007/11669487_12","mag":"1530730108"},"language":"en","primary_location":{"id":"doi:10.1007/11669487_12","is_oa":true,"landing_page_url":"https://doi.org/10.1007/11669487_12","pdf_url":"https://link.springer.com/content/pdf/10.1007/11669487_12.pdf","source":{"id":"https://openalex.org/S106296714","display_name":"Lecture notes in computer science","issn_l":"0302-9743","issn":["0302-9743","1611-3349"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"book series"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Lecture Notes in Computer Science","raw_type":"book-chapter"},"type":"book-chapter","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"bronze","oa_url":"https://link.springer.com/content/pdf/10.1007/11669487_12.pdf","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5085566571","display_name":"Herv\u00e9 D\u00e9jean","orcid":"https://orcid.org/0000-0002-9837-5358"},"institutions":[{"id":"https://openalex.org/I33976269","display_name":"Xerox (France)","ror":"https://ror.org/033q0mv79","country_code":"FR","type":"company","lineage":["https://openalex.org/I33976269","https://openalex.org/I4210132870"]}],"countries":["FR"],"is_corresponding":true,"raw_author_name":"Herv\u00e9 D\u00e9jean","raw_affiliation_strings":["Xerox Research Centre Europe, 6, chemin de Maupertuis, F-38240, Meylan"],"affiliations":[{"raw_affiliation_string":"Xerox Research Centre Europe, 6, chemin de Maupertuis, F-38240, Meylan","institution_ids":["https://openalex.org/I33976269"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5061308895","display_name":"Jean-Luc Meunier","orcid":null},"institutions":[{"id":"https://openalex.org/I33976269","display_name":"Xerox (France)","ror":"https://ror.org/033q0mv79","country_code":"FR","type":"company","lineage":["https://openalex.org/I33976269","https://openalex.org/I4210132870"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Jean-Luc Meunier","raw_affiliation_strings":["Xerox Research Centre Europe, 6, chemin de Maupertuis, F-38240, Meylan"],"affiliations":[{"raw_affiliation_string":"Xerox Research Centre Europe, 6, chemin de Maupertuis, F-38240, Meylan","institution_ids":["https://openalex.org/I33976269"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5085566571"],"corresponding_institution_ids":["https://openalex.org/I33976269"],"apc_list":{"value":5000,"currency":"EUR","value_usd":5392},"apc_paid":null,"fwci":4.5854,"has_fulltext":false,"cited_by_count":59,"citation_normalized_percentile":{"value":0.94682779,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"129","last_page":"140"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13523","display_name":"Mathematics, Computing, and Information Processing","score":0.995199978351593,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T13523","display_name":"Mathematics, Computing, and Information Processing","score":0.995199978351593,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.9879999756813049,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12377","display_name":"Digital Humanities and Scholarship","score":0.9793999791145325,"subfield":{"id":"https://openalex.org/subfields/1208","display_name":"Literature and Literary Theory"},"field":{"id":"https://openalex.org/fields/12","display_name":"Arts and Humanities"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8913120031356812},{"id":"https://openalex.org/keywords/xml","display_name":"XML","score":0.8031505346298218},{"id":"https://openalex.org/keywords/document-structure-description","display_name":"Document Structure Description","score":0.6935925483703613},{"id":"https://openalex.org/keywords/bitmap","display_name":"Bitmap","score":0.6726091504096985},{"id":"https://openalex.org/keywords/streaming-xml","display_name":"Streaming XML","score":0.6686787009239197},{"id":"https://openalex.org/keywords/simple-api-for-xml","display_name":"Simple API for XML","score":0.6198893189430237},{"id":"https://openalex.org/keywords/xml-framework","display_name":"XML framework","score":0.5604744553565979},{"id":"https://openalex.org/keywords/efficient-xml-interchange","display_name":"Efficient XML Interchange","score":0.5529270768165588},{"id":"https://openalex.org/keywords/xml-validation","display_name":"XML validation","score":0.5508924126625061},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.5335277915000916},{"id":"https://openalex.org/keywords/graphical-user-interface","display_name":"Graphical user interface","score":0.4967861771583557},{"id":"https://openalex.org/keywords/xml-database","display_name":"XML database","score":0.4508046507835388},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.44394010305404663},{"id":"https://openalex.org/keywords/document-type-definition","display_name":"Document type definition","score":0.4179422855377197},{"id":"https://openalex.org/keywords/xml-schema-editor","display_name":"XML Schema Editor","score":0.417126327753067},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.40111011266708374},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.32583558559417725},{"id":"https://openalex.org/keywords/xml-signature","display_name":"XML Signature","score":0.3012293577194214},{"id":"https://openalex.org/keywords/computer-graphics","display_name":"Computer graphics (images)","score":0.19743436574935913}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8913120031356812},{"id":"https://openalex.org/C8797682","wikidata":"https://www.wikidata.org/wiki/Q2115","display_name":"XML","level":2,"score":0.8031505346298218},{"id":"https://openalex.org/C68699486","wikidata":"https://www.wikidata.org/wiki/Q265904","display_name":"Document Structure Description","level":3,"score":0.6935925483703613},{"id":"https://openalex.org/C3115412","wikidata":"https://www.wikidata.org/wiki/Q1194708","display_name":"Bitmap","level":2,"score":0.6726091504096985},{"id":"https://openalex.org/C44883583","wikidata":"https://www.wikidata.org/wiki/Q7622687","display_name":"Streaming XML","level":3,"score":0.6686787009239197},{"id":"https://openalex.org/C8595896","wikidata":"https://www.wikidata.org/wiki/Q577094","display_name":"Simple API for XML","level":5,"score":0.6198893189430237},{"id":"https://openalex.org/C21761406","wikidata":"https://www.wikidata.org/wiki/Q8042330","display_name":"XML framework","level":3,"score":0.5604744553565979},{"id":"https://openalex.org/C11508877","wikidata":"https://www.wikidata.org/wiki/Q1124477","display_name":"Efficient XML Interchange","level":3,"score":0.5529270768165588},{"id":"https://openalex.org/C55348073","wikidata":"https://www.wikidata.org/wiki/Q595926","display_name":"XML validation","level":3,"score":0.5508924126625061},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.5335277915000916},{"id":"https://openalex.org/C37789001","wikidata":"https://www.wikidata.org/wiki/Q782543","display_name":"Graphical user interface","level":2,"score":0.4967861771583557},{"id":"https://openalex.org/C183068750","wikidata":"https://www.wikidata.org/wiki/Q357393","display_name":"XML database","level":3,"score":0.4508046507835388},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.44394010305404663},{"id":"https://openalex.org/C84314905","wikidata":"https://www.wikidata.org/wiki/Q212327","display_name":"Document type definition","level":4,"score":0.4179422855377197},{"id":"https://openalex.org/C34716815","wikidata":"https://www.wikidata.org/wiki/Q8042322","display_name":"XML Schema Editor","level":3,"score":0.417126327753067},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.40111011266708374},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.32583558559417725},{"id":"https://openalex.org/C34330436","wikidata":"https://www.wikidata.org/wiki/Q979532","display_name":"XML Signature","level":4,"score":0.3012293577194214},{"id":"https://openalex.org/C121684516","wikidata":"https://www.wikidata.org/wiki/Q7600677","display_name":"Computer graphics (images)","level":1,"score":0.19743436574935913}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1007/11669487_12","is_oa":true,"landing_page_url":"https://doi.org/10.1007/11669487_12","pdf_url":"https://link.springer.com/content/pdf/10.1007/11669487_12.pdf","source":{"id":"https://openalex.org/S106296714","display_name":"Lecture notes in computer science","issn_l":"0302-9743","issn":["0302-9743","1611-3349"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"book series"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Lecture Notes in Computer Science","raw_type":"book-chapter"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.71.4946","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.71.4946","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://www.xrce.xerox.com/Publications/Attachments/2005-053/das06-026.pdf","raw_type":"text"}],"best_oa_location":{"id":"doi:10.1007/11669487_12","is_oa":true,"landing_page_url":"https://doi.org/10.1007/11669487_12","pdf_url":"https://link.springer.com/content/pdf/10.1007/11669487_12.pdf","source":{"id":"https://openalex.org/S106296714","display_name":"Lecture notes in computer science","issn_l":"0302-9743","issn":["0302-9743","1611-3349"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"book series"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Lecture Notes in Computer Science","raw_type":"book-chapter"},"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320333067","display_name":"Sixth Framework Programme","ror":null}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W1530730108.pdf","grobid_xml":"https://content.openalex.org/works/W1530730108.grobid-xml"},"referenced_works_count":9,"referenced_works":["https://openalex.org/W8079469","https://openalex.org/W36566811","https://openalex.org/W102704617","https://openalex.org/W611401390","https://openalex.org/W1607161872","https://openalex.org/W2035439590","https://openalex.org/W2035706500","https://openalex.org/W2100735443","https://openalex.org/W2105413446"],"related_works":["https://openalex.org/W1970328709","https://openalex.org/W2114746386","https://openalex.org/W2090406160","https://openalex.org/W2464214490","https://openalex.org/W1450348355","https://openalex.org/W2157637842","https://openalex.org/W125863246","https://openalex.org/W134554114","https://openalex.org/W2147404202","https://openalex.org/W112893379"],"abstract_inverted_index":null,"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2023,"cited_by_count":4},{"year":2022,"cited_by_count":3},{"year":2021,"cited_by_count":3},{"year":2020,"cited_by_count":2},{"year":2019,"cited_by_count":5},{"year":2018,"cited_by_count":1},{"year":2017,"cited_by_count":3},{"year":2016,"cited_by_count":1},{"year":2015,"cited_by_count":3},{"year":2014,"cited_by_count":6},{"year":2013,"cited_by_count":4},{"year":2012,"cited_by_count":4}],"updated_date":"2026-04-05T17:49:38.594831","created_date":"2025-10-10T00:00:00"}
