{"id":"https://openalex.org/W7123487007","doi":"https://doi.org/10.48550/arxiv.2601.07581","title":"BenchSeg: A Large-Scale Dataset and Benchmark for Multi-View Food Video Segmentation","display_name":"BenchSeg: A Large-Scale Dataset and Benchmark for Multi-View Food Video Segmentation","publication_year":2026,"publication_date":"2026-01-12","ids":{"openalex":"https://openalex.org/W7123487007","doi":"https://doi.org/10.48550/arxiv.2601.07581"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2601.07581","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.07581","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2601.07581","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5055293235","display_name":"Ahmad AlMughrabi","orcid":"https://orcid.org/0000-0002-9336-3200"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"AlMughrabi, Ahmad","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5122946202","display_name":"Guillermo Rivo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Rivo, Guillermo","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5122983442","display_name":"Carlos Jim\u00e9nez-Farf\u00e1n","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jim\u00e9nez-Farf\u00e1n, Carlos","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5042536975","display_name":"Umair Haroon","orcid":"https://orcid.org/0000-0002-1449-1838"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Haroon, Umair","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5062149542","display_name":"Farid Al-Areqi","orcid":"https://orcid.org/0000-0001-7607-993X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Al-Areqi, Farid","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5122933921","display_name":"Hyunjun Jung","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jung, Hyunjun","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5107007924","display_name":"Busam Benjamin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Busam, Benjamin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5087847529","display_name":"Ricardo Fagundes Marques","orcid":"https://orcid.org/0000-0002-1554-7223"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Marques, Ricardo","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5122969534","display_name":"Petia Radeva","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Radeva, Petia","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":9,"corresponding_author_ids":["https://openalex.org/A5055293235"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10866","display_name":"Nutritional Studies and Diet","score":0.9484999775886536,"subfield":{"id":"https://openalex.org/subfields/2739","display_name":"Public Health, Environmental and Occupational Health"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},"topics":[{"id":"https://openalex.org/T10866","display_name":"Nutritional Studies and Diet","score":0.9484999775886536,"subfield":{"id":"https://openalex.org/subfields/2739","display_name":"Public Health, Environmental and Occupational Health"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T10616","display_name":"Smart Agriculture and AI","score":0.008299999870359898,"subfield":{"id":"https://openalex.org/subfields/1110","display_name":"Plant Science"},"field":{"id":"https://openalex.org/fields/11","display_name":"Agricultural and Biological Sciences"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T11259","display_name":"Agriculture Sustainability and Environmental Impact","score":0.005400000140070915,"subfield":{"id":"https://openalex.org/subfields/2303","display_name":"Ecology"},"field":{"id":"https://openalex.org/fields/23","display_name":"Environmental Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.7767000198364258},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.6608999967575073},{"id":"https://openalex.org/keywords/consistency","display_name":"Consistency (knowledge bases)","score":0.5458999872207642},{"id":"https://openalex.org/keywords/image-segmentation","display_name":"Image segmentation","score":0.483599990606308},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4359000027179718},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.43059998750686646},{"id":"https://openalex.org/keywords/stability","display_name":"Stability (learning theory)","score":0.4140999913215637},{"id":"https://openalex.org/keywords/scale-space-segmentation","display_name":"Scale-space segmentation","score":0.37880000472068787}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7807999849319458},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.7767000198364258},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.6608999967575073},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6578999757766724},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.5458999872207642},{"id":"https://openalex.org/C124504099","wikidata":"https://www.wikidata.org/wiki/Q56933","display_name":"Image segmentation","level":3,"score":0.483599990606308},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4359000027179718},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.43059998750686646},{"id":"https://openalex.org/C112972136","wikidata":"https://www.wikidata.org/wiki/Q7595718","display_name":"Stability (learning theory)","level":2,"score":0.4140999913215637},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3799999952316284},{"id":"https://openalex.org/C65885262","wikidata":"https://www.wikidata.org/wiki/Q7429708","display_name":"Scale-space segmentation","level":4,"score":0.37880000472068787},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.36890000104904175},{"id":"https://openalex.org/C58489278","wikidata":"https://www.wikidata.org/wiki/Q1172284","display_name":"Data set","level":2,"score":0.3564999997615814},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.33640000224113464},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3305000066757202},{"id":"https://openalex.org/C25694479","wikidata":"https://www.wikidata.org/wiki/Q7446278","display_name":"Segmentation-based object categorization","level":5,"score":0.3086000084877014},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.30649998784065247},{"id":"https://openalex.org/C20556612","wikidata":"https://www.wikidata.org/wiki/Q4469374","display_name":"Volume (thermodynamics)","level":2,"score":0.2928999960422516},{"id":"https://openalex.org/C2780719617","wikidata":"https://www.wikidata.org/wiki/Q1030752","display_name":"Salient","level":2,"score":0.28459998965263367},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.27230000495910645},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.2632000148296356},{"id":"https://openalex.org/C2775936607","wikidata":"https://www.wikidata.org/wiki/Q466845","display_name":"Tracking (education)","level":2,"score":0.2565000057220459}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2601.07581","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.07581","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2601.07581","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.07581","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"score":0.6311673521995544,"id":"https://metadata.un.org/sdg/2","display_name":"Zero hunger"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Food":[0],"image":[1,111],"segmentation":[2,40,78,149,171,214],"is":[3],"a":[4,35,72,130,163],"critical":[5],"task":[6],"for":[7,152],"dietary":[8,153],"analysis,":[9],"enabling":[10],"accurate":[11],"estimation":[12],"of":[13,75,132],"food":[14,38,148,213],"volume":[15],"and":[16,26,42,55,84,92,96,104,150,179,211],"nutrients.":[17],"However,":[18],"current":[19],"methods":[20,119],"suffer":[21],"from":[22],"limited":[23],"multi-view":[24,37],"data":[25],"poor":[27],"generalization":[28],"to":[29,157,186,200],"new":[30,145],"viewpoints.":[31],"We":[32,70,197],"introduce":[33,162],"BenchSeg,":[34],"novel":[36,116],"video":[39],"dataset":[41,91,209],"benchmark.":[43],"BenchSeg":[44,199],"aggregates":[45],"55":[46],"dish":[47,64],"scenes":[48],"(from":[49],"Nutrition5k,":[50],"Vegetables":[51],"&amp;":[52],"Fruits,":[53],"MetaFood3D,":[54],"FoodKit)":[56],"with":[57,98],"25,284":[58],"meticulously":[59],"annotated":[60],"frames,":[61],"capturing":[62],"each":[63],"under":[65,115,193],"free":[66],"360\u00b0":[67],"camera":[68],"motion.":[69],"evaluate":[71,93],"diverse":[73],"set":[74],"20":[76],"state-of-the-art":[77],"models":[79,215],"(e.g.,":[80,137],"SAM-based,":[81],"transformer,":[82],"CNN,":[83],"large":[85],"multimodal)":[86],"on":[87,101,129],"the":[88,208,212],"existing":[89],"FoodSeg103":[90],"them":[94],"(alone":[95],"combined":[97],"video-memory":[99],"modules)":[100],"BenchSeg.":[102],"Quantitative":[103],"qualitative":[105],"results":[106],"demonstrate":[107],"that":[108,168,190],"while":[109],"standard":[110,194],"segmenters":[112],"degrade":[113],"sharply":[114],"viewpoints,":[117],"memory-augmented":[118],"maintain":[120],"temporal":[121,165],"consistency":[122],"across":[123],"frames.":[124],"Our":[125],"best":[126],"model":[127],"based":[128],"combination":[131],"SeTR-MLA+XMem2":[133],"outperforms":[134],"prior":[135],"work":[136],"improving":[138],"over":[139,173],"FoodMem":[140],"by":[141],"~2.63%":[142],"mAP),":[143],"offering":[144],"insights":[146],"into":[147],"tracking":[151],"analysis.":[154],"In":[155],"addition":[156],"frame-wise":[158],"spatial":[159],"accuracy,":[160],"we":[161],"dedicated":[164],"evaluation":[166],"protocol":[167],"explicitly":[169],"quantifies":[170],"stability":[172],"time":[174],"through":[175],"continuity,":[176],"flicker":[177],"rate,":[178],"IoU":[180],"drift":[181],"metrics.":[182],"This":[183],"allows":[184],"us":[185],"reveal":[187],"failure":[188],"modes":[189],"remain":[191],"invisible":[192],"per-frame":[195],"evaluations.":[196],"release":[198],"foster":[201],"future":[202],"research.":[203],"The":[204],"project":[205],"page":[206],"including":[207],"annotations":[210],"can":[216],"be":[217],"found":[218],"at":[219],"https://amughrabi.github.io/benchseg.":[220]},"counts_by_year":[],"updated_date":"2026-01-22T23:29:09.771500","created_date":"2026-01-14T00:00:00"}
