{"id":"https://openalex.org/W4293248177","doi":"https://doi.org/10.1145/3539813.3545150","title":"WooIR","display_name":"WooIR","publication_year":2022,"publication_date":"2022-08-23","ids":{"openalex":"https://openalex.org/W4293248177","doi":"https://doi.org/10.1145/3539813.3545150"},"language":"en","primary_location":{"id":"doi:10.1145/3539813.3545150","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3539813.3545150","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2022 ACM SIGIR International Conference on Theory of Information Retrieval","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://pure.uva.nl/ws/files/137530237/3539813.3545150.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5033059967","display_name":"Ruben van Heusden","orcid":"https://orcid.org/0000-0001-9204-9220"},"institutions":[{"id":"https://openalex.org/I4210135670","display_name":"Amsterdam University of the Arts","ror":"https://ror.org/04dde1554","country_code":"NL","type":"education","lineage":["https://openalex.org/I4210135670"]},{"id":"https://openalex.org/I887064364","display_name":"University of Amsterdam","ror":"https://ror.org/04dkp9463","country_code":"NL","type":"education","lineage":["https://openalex.org/I887064364"]}],"countries":["NL"],"is_corresponding":true,"raw_author_name":"Ruben van Heusden","raw_affiliation_strings":["University of Amsterdam, Amsterdam, Netherlands"],"affiliations":[{"raw_affiliation_string":"University of Amsterdam, Amsterdam, Netherlands","institution_ids":["https://openalex.org/I4210135670","https://openalex.org/I887064364"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5044511901","display_name":"Jaap Kamps","orcid":"https://orcid.org/0000-0002-6614-0087"},"institutions":[{"id":"https://openalex.org/I887064364","display_name":"University of Amsterdam","ror":"https://ror.org/04dkp9463","country_code":"NL","type":"education","lineage":["https://openalex.org/I887064364"]},{"id":"https://openalex.org/I4210135670","display_name":"Amsterdam University of the Arts","ror":"https://ror.org/04dde1554","country_code":"NL","type":"education","lineage":["https://openalex.org/I4210135670"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Jaap Kamps","raw_affiliation_strings":["University of Amsterdam, Amsterdam, Netherlands"],"affiliations":[{"raw_affiliation_string":"University of Amsterdam, Amsterdam, Netherlands","institution_ids":["https://openalex.org/I4210135670","https://openalex.org/I887064364"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5049970479","display_name":"M. Marx","orcid":"https://orcid.org/0000-0003-3255-3729"},"institutions":[{"id":"https://openalex.org/I4210135670","display_name":"Amsterdam University of the Arts","ror":"https://ror.org/04dde1554","country_code":"NL","type":"education","lineage":["https://openalex.org/I4210135670"]},{"id":"https://openalex.org/I887064364","display_name":"University of Amsterdam","ror":"https://ror.org/04dkp9463","country_code":"NL","type":"education","lineage":["https://openalex.org/I887064364"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Maarten Marx","raw_affiliation_strings":["University of Amsterdam, Amsterdam, Netherlands"],"affiliations":[{"raw_affiliation_string":"University of Amsterdam, Amsterdam, Netherlands","institution_ids":["https://openalex.org/I4210135670","https://openalex.org/I887064364"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5033059967"],"corresponding_institution_ids":["https://openalex.org/I4210135670","https://openalex.org/I887064364"],"apc_list":null,"apc_paid":null,"fwci":0.2039,"has_fulltext":true,"cited_by_count":2,"citation_normalized_percentile":{"value":0.47903983,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":"9443","issue":null,"first_page":"24","last_page":"33"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10824","display_name":"Image Retrieval and Classification Techniques","score":0.9976000189781189,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9944000244140625,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8379195928573608},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.7302966117858887},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.7061094641685486},{"id":"https://openalex.org/keywords/metric","display_name":"Metric (unit)","score":0.7044504880905151},{"id":"https://openalex.org/keywords/baseline","display_name":"Baseline (sea)","score":0.6253237128257751},{"id":"https://openalex.org/keywords/field","display_name":"Field (mathematics)","score":0.5702023506164551},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.5500319600105286},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.5240854620933533},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.4632019102573395},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.42928779125213623},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.37268802523612976}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8379195928573608},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.7302966117858887},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.7061094641685486},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.7044504880905151},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.6253237128257751},{"id":"https://openalex.org/C9652623","wikidata":"https://www.wikidata.org/wiki/Q190109","display_name":"Field (mathematics)","level":2,"score":0.5702023506164551},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.5500319600105286},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.5240854620933533},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.4632019102573395},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.42928779125213623},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.37268802523612976},{"id":"https://openalex.org/C111368507","wikidata":"https://www.wikidata.org/wiki/Q43518","display_name":"Oceanography","level":1,"score":0.0},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C202444582","wikidata":"https://www.wikidata.org/wiki/Q837863","display_name":"Pure mathematics","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C127313418","wikidata":"https://www.wikidata.org/wiki/Q1069","display_name":"Geology","level":0,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1145/3539813.3545150","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3539813.3545150","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2022 ACM SIGIR International Conference on Theory of Information Retrieval","raw_type":"proceedings-article"},{"id":"pmh:oai:dare.uva.nl:openaire/f615b830-cfd1-481a-b096-918daa2a1cb2","is_oa":true,"landing_page_url":"https://hdl.handle.net/11245.1/f615b830-cfd1-481a-b096-918daa2a1cb2","pdf_url":"https://pure.uva.nl/ws/files/137530237/3539813.3545150.pdf","source":{"id":"https://openalex.org/S4306400088","display_name":"UvA-DARE (University of Amsterdam)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I887064364","host_organization_name":"University of Amsterdam","host_organization_lineage":["https://openalex.org/I887064364"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"van Heusden, R, Kamps, J & Marx, M 2022, WooIR: A New Open Page Stream Segmentation Dataset. in ICTIR'22 : proceedings of the 2022 ACM SIGIR International Conference on Theory of Information Retrieval : July 11-12, 2022, Madrid, Spain. New York, NY, pp. 24-33, 8th ACM SIGIR International Conference on the Theory of Information Retrieval, ICTIR 2022, Virtual, Online, Spain, 11/07/22. https://doi.org/10.1145/3539813.3545150","raw_type":"info:eu-repo/semantics/conferenceObject"},{"id":"pmh:oai:dare.uva.nl:openaire_cris_publications/f615b830-cfd1-481a-b096-918daa2a1cb2","is_oa":false,"landing_page_url":"https://handle.uba.uva.nl/personal/pure/en/publications/wooir-a-new-open-page-stream-segmentation-dataset(f615b830-cfd1-481a-b096-918daa2a1cb2).html","pdf_url":null,"source":{"id":"https://openalex.org/S4306400088","display_name":"UvA-DARE (University of Amsterdam)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I887064364","host_organization_name":"University of Amsterdam","host_organization_lineage":["https://openalex.org/I887064364"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"van Heusden, R, Kamps, J & Marx, M 2022, WooIR: A New Open Page Stream Segmentation Dataset. in ICTIR'22 : proceedings of the 2022 ACM SIGIR International Conference on Theory of Information Retrieval : July 11-12, 2022, Madrid, Spain. New York, NY, pp. 24-33, 8th ACM SIGIR International Conference on the Theory of Information Retrieval, ICTIR 2022, Virtual, Online, Spain, 11/07/22. https://doi.org/10.1145/3539813.3545150","raw_type":"info:eu-repo/semantics/conferenceObject"}],"best_oa_location":{"id":"pmh:oai:dare.uva.nl:openaire/f615b830-cfd1-481a-b096-918daa2a1cb2","is_oa":true,"landing_page_url":"https://hdl.handle.net/11245.1/f615b830-cfd1-481a-b096-918daa2a1cb2","pdf_url":"https://pure.uva.nl/ws/files/137530237/3539813.3545150.pdf","source":{"id":"https://openalex.org/S4306400088","display_name":"UvA-DARE (University of Amsterdam)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I887064364","host_organization_name":"University of Amsterdam","host_organization_lineage":["https://openalex.org/I887064364"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"van Heusden, R, Kamps, J & Marx, M 2022, WooIR: A New Open Page Stream Segmentation Dataset. in ICTIR'22 : proceedings of the 2022 ACM SIGIR International Conference on Theory of Information Retrieval : July 11-12, 2022, Madrid, Spain. New York, NY, pp. 24-33, 8th ACM SIGIR International Conference on the Theory of Information Retrieval, ICTIR 2022, Virtual, Online, Spain, 11/07/22. https://doi.org/10.1145/3539813.3545150","raw_type":"info:eu-repo/semantics/conferenceObject"},"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16","score":0.8100000023841858}],"awards":[{"id":"https://openalex.org/G4723783796","display_name":null,"funder_award_id":"CISC.CC.016","funder_id":"https://openalex.org/F4320321800","funder_display_name":"Nederlandse Organisatie voor Wetenschappelijk Onderzoek"}],"funders":[{"id":"https://openalex.org/F4320313716","display_name":"Canadian Institute of Steel Construction","ror":"https://ror.org/04w9bz196"},{"id":"https://openalex.org/F4320321800","display_name":"Nederlandse Organisatie voor Wetenschappelijk Onderzoek","ror":"https://ror.org/04jsz6e67"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4293248177.pdf","grobid_xml":"https://content.openalex.org/works/W4293248177.grobid-xml"},"referenced_works_count":32,"referenced_works":["https://openalex.org/W94155879","https://openalex.org/W1568806324","https://openalex.org/W1581204992","https://openalex.org/W1966382373","https://openalex.org/W1969054840","https://openalex.org/W1969982791","https://openalex.org/W1977813952","https://openalex.org/W1979469248","https://openalex.org/W2014378927","https://openalex.org/W2025620159","https://openalex.org/W2034435383","https://openalex.org/W2044899606","https://openalex.org/W2066792529","https://openalex.org/W2083122709","https://openalex.org/W2106918957","https://openalex.org/W2113227740","https://openalex.org/W2139042198","https://openalex.org/W2159083595","https://openalex.org/W2425100928","https://openalex.org/W2563209596","https://openalex.org/W2798826627","https://openalex.org/W2811036830","https://openalex.org/W2999219213","https://openalex.org/W3011629316","https://openalex.org/W3034327408","https://openalex.org/W3094448412","https://openalex.org/W3151205171","https://openalex.org/W3191530498","https://openalex.org/W4205729528","https://openalex.org/W4229912654","https://openalex.org/W4232110135","https://openalex.org/W4391156274"],"related_works":["https://openalex.org/W4294661698","https://openalex.org/W4319453497","https://openalex.org/W2946668189","https://openalex.org/W4304777330","https://openalex.org/W2758480492","https://openalex.org/W1509467138","https://openalex.org/W4293862731","https://openalex.org/W2798678281","https://openalex.org/W2008106311","https://openalex.org/W2091635963"],"abstract_inverted_index":{"In":[0],"this":[1],"work":[2],"we":[3,68],"presentWooIR,":[4],"an":[5,85],"open":[6],"realistic":[7],"benchmark":[8],"for":[9,80,139],"Page":[10],"Stream":[11],"Segmentation":[12],"(PSS),":[13],"the":[14,47,56,63,66,74,81,89,92,100,104,108,113,129,134,140],"task":[15],"of":[16,22,27,31,58,65,94,103,119],"recovering":[17],"document":[18],"boundaries":[19],"from":[20,43,62,128],"aggregatedstreams":[21],"pages.":[23],"Our":[24],"dataset":[25,67,75,110,114],"consists":[26],"over":[28],"200":[29],"streams":[30],"scanned":[32],"in":[33,50,84,91],"documents,":[34,36],"7K":[35],"45K":[37],"pages":[38],"and":[39,76,121],"10M":[40],"words,":[41],"originating":[42],"documents":[44],"released":[45],"by":[46],"Dutch":[48],"government":[49],"response":[51],"to":[52,87,99],"requests":[53],"made":[54],"under":[55],"Freedom":[57],"Information":[59],"Act.":[60],"Apart":[61],"introduction":[64],"perform":[69],"several":[70],"baseline":[71],"experiments":[72],"on":[73,107],"compare":[77],"six":[78,105],"metrics":[79,96,106],"PSS":[82,141],"task,":[83],"attempt":[86],"unify":[88],"field":[90,132],"usage":[93],"evaluation":[95,137],"more":[97],"suited":[98],"task.":[101,142],"Analysis":[102],"WooIR":[109],"shows":[111],"that":[112],"contains":[115],"a":[116],"good":[117],"balance":[118],"easy":[120],"hard":[122],"samples.":[123],"The":[124],"Panoptic":[125],"Quality":[126],"metric":[127,138],"image":[130],"segmentation":[131],"seems":[133],"most":[135],"appropriate":[136]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2022-08-27T00:00:00"}
