{"id":"https://openalex.org/W4414981059","doi":"https://doi.org/10.48550/arxiv.2510.06071","title":"Benchmark It Yourself (BIY): Preparing a Dataset and Benchmarking AI Models for Scatterplot-Related Tasks","display_name":"Benchmark It Yourself (BIY): Preparing a Dataset and Benchmarking AI Models for Scatterplot-Related Tasks","publication_year":2025,"publication_date":"2025-10-07","ids":{"openalex":"https://openalex.org/W4414981059","doi":"https://doi.org/10.48550/arxiv.2510.06071"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2510.06071","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2510.06071","pdf_url":"https://arxiv.org/pdf/2510.06071","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2510.06071","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5041821438","display_name":"Jo\u00e3o Palmeiro","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Palmeiro, Jo\u00e3o","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101647087","display_name":"Diogo Duarte","orcid":"https://orcid.org/0000-0001-9658-1118"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Duarte, Diogo","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101645392","display_name":"Rita Costa","orcid":"https://orcid.org/0000-0001-9003-1350"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Costa, Rita","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5077752651","display_name":"Pedro Bizarro","orcid":"https://orcid.org/0000-0001-5281-1970"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bizarro, Pedro","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5041821438"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.40049999952316284,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.40049999952316284,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/benchmarking","display_name":"Benchmarking","score":0.8203999996185303},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.7285000085830688},{"id":"https://openalex.org/keywords/outlier","display_name":"Outlier","score":0.6653000116348267},{"id":"https://openalex.org/keywords/identification","display_name":"Identification (biology)","score":0.6165000200271606},{"id":"https://openalex.org/keywords/bounding-overwatch","display_name":"Bounding overwatch","score":0.5622000098228455},{"id":"https://openalex.org/keywords/chart","display_name":"Chart","score":0.4943999946117401},{"id":"https://openalex.org/keywords/limiting","display_name":"Limiting","score":0.400299996137619},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.3711000084877014}],"concepts":[{"id":"https://openalex.org/C86251818","wikidata":"https://www.wikidata.org/wiki/Q816754","display_name":"Benchmarking","level":2,"score":0.8203999996185303},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8083999752998352},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.7285000085830688},{"id":"https://openalex.org/C79337645","wikidata":"https://www.wikidata.org/wiki/Q779824","display_name":"Outlier","level":2,"score":0.6653000116348267},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.6165000200271606},{"id":"https://openalex.org/C63584917","wikidata":"https://www.wikidata.org/wiki/Q333286","display_name":"Bounding overwatch","level":2,"score":0.5622000098228455},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.5523999929428101},{"id":"https://openalex.org/C190812933","wikidata":"https://www.wikidata.org/wiki/Q28923","display_name":"Chart","level":2,"score":0.4943999946117401},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4424000084400177},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4320000112056732},{"id":"https://openalex.org/C188198153","wikidata":"https://www.wikidata.org/wiki/Q1613840","display_name":"Limiting","level":2,"score":0.400299996137619},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.3711000084877014},{"id":"https://openalex.org/C739882","wikidata":"https://www.wikidata.org/wiki/Q3560506","display_name":"Anomaly detection","level":2,"score":0.33980000019073486},{"id":"https://openalex.org/C184898388","wikidata":"https://www.wikidata.org/wiki/Q1435712","display_name":"Pairwise comparison","level":2,"score":0.33899998664855957},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.3249000012874603},{"id":"https://openalex.org/C64869954","wikidata":"https://www.wikidata.org/wiki/Q1859747","display_name":"False positive paradox","level":2,"score":0.2980000078678131},{"id":"https://openalex.org/C81669768","wikidata":"https://www.wikidata.org/wiki/Q2359161","display_name":"Precision and recall","level":2,"score":0.2978000044822693},{"id":"https://openalex.org/C63085389","wikidata":"https://www.wikidata.org/wiki/Q4287912","display_name":"Medoid","level":3,"score":0.29760000109672546},{"id":"https://openalex.org/C164866538","wikidata":"https://www.wikidata.org/wiki/Q367351","display_name":"Cluster (spacecraft)","level":2,"score":0.28110000491142273},{"id":"https://openalex.org/C132964779","wikidata":"https://www.wikidata.org/wiki/Q2110223","display_name":"Raw data","level":2,"score":0.2777000069618225},{"id":"https://openalex.org/C63882131","wikidata":"https://www.wikidata.org/wiki/Q17122954","display_name":"Strengths and weaknesses","level":2,"score":0.2705000042915344}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2510.06071","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2510.06071","pdf_url":"https://arxiv.org/pdf/2510.06071","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2510.06071","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2510.06071","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2510.06071","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2510.06071","pdf_url":"https://arxiv.org/pdf/2510.06071","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4414981059.pdf"},"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"AI":[0],"models":[1,59,85],"are":[2,95,114,119,165],"increasingly":[3],"used":[4],"for":[5,24,98,111,125],"data":[6,44],"analysis":[7],"and":[8,46,50,62,81,86,117,157],"visualization,":[9],"yet":[10],"benchmarks":[11],"rarely":[12],"address":[13,21],"scatterplot-specific":[14],"tasks,":[15],"limiting":[16],"insight":[17],"into":[18],"performance.":[19],"To":[20],"this":[22],"gap":[23],"one":[25],"of":[26,38,74,134],"the":[27,109,132],"most":[28],"common":[29],"chart":[30,48,135],"types,":[31],"we":[32],"introduce":[33],"a":[34,51,142],"synthetic,":[35],"annotated":[36],"dataset":[37],"over":[39],"18,000":[40],"scatterplots":[41,151],"from":[42,60,72],"six":[43],"generators":[45],"17":[47],"designs,":[49],"benchmark":[52],"based":[53],"on":[54,67,137],"it.":[55],"We":[56],"evaluate":[57],"proprietary":[58],"OpenAI":[61,84],"Google":[63],"using":[64],"N-shot":[65],"prompting":[66],"five":[68],"distinct":[69],"tasks":[70,113],"derived":[71],"annotations":[73],"cluster":[75],"bounding":[76],"boxes,":[77],"their":[78],"center":[79],"coordinates,":[80],"outlier":[82,128],"coordinates.":[83],"Gemini":[87],"2.5":[88],"Flash,":[89],"especially":[90],"when":[91],"prompted":[92],"with":[93,152],"examples,":[94],"viable":[96],"options":[97],"counting":[99],"clusters":[100],"and,":[101],"in":[102,127],"Flash's":[103],"case,":[104],"outliers":[105],"(90%+":[106],"Accuracy).":[107],"However,":[108],"results":[110],"localization-related":[112],"unsatisfactory:":[115],"Precision":[116],"Recall":[118],"near":[120],"or":[121,159],"below":[122],"50%,":[123],"except":[124],"Flash":[126],"identification":[129],"(65.01%).":[130],"Furthermore,":[131],"impact":[133],"design":[136],"performance":[138],"appears":[139],"to":[140,149],"be":[141],"secondary":[143],"factor,":[144],"but":[145],"it":[146],"is":[147],"advisable":[148],"avoid":[150],"wide":[153],"aspect":[154],"ratios":[155],"(16:9":[156],"21:9)":[158],"those":[160],"colored":[161],"randomly.":[162],"Supplementary":[163],"materials":[164],"available":[166],"at":[167],"https://github.com/feedzai/biy-paper.":[168]},"counts_by_year":[],"updated_date":"2026-03-11T14:59:36.786465","created_date":"2025-10-10T00:00:00"}
