{"id":"https://openalex.org/W7138172456","doi":"https://doi.org/10.1609/aaai.v40i6.42464","title":"Semantic Document Derendering: SVG Reconstruction via Vision-Language Modeling","display_name":"Semantic Document Derendering: SVG Reconstruction via Vision-Language Modeling","publication_year":2026,"publication_date":"2026-03-14","ids":{"openalex":"https://openalex.org/W7138172456","doi":"https://doi.org/10.1609/aaai.v40i6.42464"},"language":null,"primary_location":{"id":"doi:10.1609/aaai.v40i6.42464","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i6.42464","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://doi.org/10.1609/aaai.v40i6.42464","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5094246343","display_name":"Adam Hazimeh","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Adam Hazimeh","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129727326","display_name":"Ke Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ke Wang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129691947","display_name":"Mark Collier","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mark Collier","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5072231116","display_name":"Gilles Baechler","orcid":"https://orcid.org/0000-0002-8453-6093"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gilles Baechler","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5009956004","display_name":"Efi Kokiopoulou","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Efi Kokiopoulou","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5000947076","display_name":"Pascal Frossard","orcid":"https://orcid.org/0000-0002-4010-714X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Pascal Frossard","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5094246343"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.46555475,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"40","issue":"6","first_page":"4636","last_page":"4644"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.26440000534057617,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.26440000534057617,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.17990000545978546,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.10859999805688858,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/scalable-vector-graphics","display_name":"Scalable Vector Graphics","score":0.9501000046730042},{"id":"https://openalex.org/keywords/raster-graphics","display_name":"Raster graphics","score":0.8481000065803528},{"id":"https://openalex.org/keywords/vectorization","display_name":"Vectorization (mathematics)","score":0.6552000045776367},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.521399974822998},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.49720001220703125},{"id":"https://openalex.org/keywords/vector-graphics","display_name":"Vector graphics","score":0.46549999713897705},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.451200008392334},{"id":"https://openalex.org/keywords/raster-data","display_name":"Raster data","score":0.4129999876022339}],"concepts":[{"id":"https://openalex.org/C202629362","wikidata":"https://www.wikidata.org/wiki/Q2078","display_name":"Scalable Vector Graphics","level":2,"score":0.9501000046730042},{"id":"https://openalex.org/C181844469","wikidata":"https://www.wikidata.org/wiki/Q182270","display_name":"Raster graphics","level":2,"score":0.8481000065803528},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7886000275611877},{"id":"https://openalex.org/C41681595","wikidata":"https://www.wikidata.org/wiki/Q7917855","display_name":"Vectorization (mathematics)","level":2,"score":0.6552000045776367},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.521399974822998},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.49720001220703125},{"id":"https://openalex.org/C121684516","wikidata":"https://www.wikidata.org/wiki/Q7600677","display_name":"Computer graphics (images)","level":1,"score":0.46950000524520874},{"id":"https://openalex.org/C59662460","wikidata":"https://www.wikidata.org/wiki/Q170130","display_name":"Vector graphics","level":3,"score":0.46549999713897705},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.454800009727478},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.451200008392334},{"id":"https://openalex.org/C2692088","wikidata":"https://www.wikidata.org/wiki/Q182270","display_name":"Raster data","level":3,"score":0.4129999876022339},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.40380001068115234},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.40230000019073486},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.38989999890327454},{"id":"https://openalex.org/C145406643","wikidata":"https://www.wikidata.org/wiki/Q2641959","display_name":"Raster scan","level":2,"score":0.3538999855518341},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.3375000059604645},{"id":"https://openalex.org/C2776674983","wikidata":"https://www.wikidata.org/wiki/Q545981","display_name":"Image editing","level":3,"score":0.3140999972820282},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.29739999771118164},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.28380000591278076},{"id":"https://openalex.org/C42781572","wikidata":"https://www.wikidata.org/wiki/Q1250322","display_name":"Digital image","level":4,"score":0.2800999879837036},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.26409998536109924},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.25949999690055847},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.2563000023365021},{"id":"https://openalex.org/C113954288","wikidata":"https://www.wikidata.org/wiki/Q186885","display_name":"Timestamp","level":2,"score":0.25209999084472656}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1609/aaai.v40i6.42464","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i6.42464","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1609/aaai.v40i6.42464","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i6.42464","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Multimedia":[0],"documents":[1,70,198],"such":[2],"as":[3,125],"slide":[4,123,197],"presentations":[5],"and":[6,13,30,58,93,127,136,143,150,222],"posters":[7],"are":[8,19],"designed":[9],"to":[10,15,68,75,121,171,204,233],"be":[11],"interactive":[12],"easy":[14],"modify.":[16],"Yet,":[17],"they":[18,73],"often":[20],"distributed":[21],"in":[22,81,146,167,208,228],"a":[23,82,113,147,154,168,190,217],"static":[24],"raster":[25,38,48,148,183],"format,":[26],"which":[27,51],"limits":[28],"editing":[29],"customization.":[31],"Restoring":[32],"their":[33],"editability":[34],"requires":[35],"converting":[36],"these":[37],"images":[39,124],"back":[40],"into":[41,153],"structured":[42],"vector":[43],"formats.":[44],"However,":[45],"existing":[46],"geometric":[47],"vectorization":[49],"methods,":[50],"rely":[52],"on":[53],"low-level":[54],"primitives":[55],"like":[56,71],"curves":[57],"polygons,":[59],"fall":[60],"short":[61],"at":[62],"this":[63,100,209],"task.":[64],"Specifically,":[65],"when":[66],"applied":[67],"complex":[69],"slides,":[72],"fail":[74],"preserve":[76],"the":[77,88,104,138,159,181,234],"high-level":[78],"structure,":[79],"resulting":[80],"flat":[83],"collection":[84],"of":[85,106,196,220,230],"shapes":[86],"where":[87],"semantic":[89,107],"distinction":[90],"between":[91],"image":[92,142],"text":[94,144],"elements":[95,145],"is":[96,223],"lost.":[97],"To":[98],"overcome":[99],"limitation,":[101],"we":[102,187],"address":[103],"problem":[105],"document":[108],"derendering":[109],"by":[110,225],"introducing":[111],"SliDer,":[112],"novel":[114,191],"framework":[115],"that":[116,177,214],"uses":[117],"Vision-Language":[118],"Models":[119],"(VLMs)":[120],"derender":[122],"compact":[126],"editable":[128],"Scalable":[129],"Vector":[130],"Graphic":[131],"(SVG)":[132],"representations.":[133],"SliDer":[134,215],"detects":[135],"extracts":[137],"attributes":[139],"from":[140,200],"individual":[141],"input":[149],"organizes":[151],"them":[152],"coherent":[155],"SVG":[156,175],"format.":[157],"Crucially,":[158],"model":[160],"iteratively":[161],"refines":[162],"its":[163],"predictions":[164],"during":[165],"inference":[166],"process":[169],"analogous":[170],"human":[172,226],"design,":[173],"generating":[174],"code":[176],"more":[178],"faithfully":[179],"reconstructs":[180],"original":[182],"upon":[184],"rendering.":[185],"Furthermore,":[186],"introduce":[188],"Slide2SVG,":[189],"dataset":[192],"comprising":[193],"raster-SVG":[194],"pairs":[195],"curated":[199],"real-world":[201],"scientific":[202],"presentations,":[203],"facilitate":[205],"future":[206],"research":[207],"domain.":[210],"Our":[211],"results":[212],"demonstrate":[213],"achieves":[216],"reconstruction":[218],"LPIPS":[219],"0.069,":[221],"favored":[224],"evaluators":[227],"82.9%":[229],"cases":[231],"compared":[232],"strongest":[235],"zero-shot":[236],"VLM":[237],"baseline.":[238]},"counts_by_year":[],"updated_date":"2026-05-04T08:30:34.212998","created_date":"2026-03-18T00:00:00"}
