{"id":"https://openalex.org/W4416017191","doi":"https://doi.org/10.1145/3746252.3761621","title":"VideoAVE: A Multi-Attribute Video-to-Text Attribute Value Extraction Dataset and Benchmark Models","display_name":"VideoAVE: A Multi-Attribute Video-to-Text Attribute Value Extraction Dataset and Benchmark Models","publication_year":2025,"publication_date":"2025-11-08","ids":{"openalex":"https://openalex.org/W4416017191","doi":"https://doi.org/10.1145/3746252.3761621"},"language":"en","primary_location":{"id":"doi:10.1145/3746252.3761621","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3746252.3761621","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 34th ACM International Conference on Information and Knowledge Management","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3746252.3761621","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5109701297","display_name":"Ming Cheng","orcid":"https://orcid.org/0009-0006-8475-2331"},"institutions":[{"id":"https://openalex.org/I859038795","display_name":"Virginia Tech","ror":"https://ror.org/02smfhw86","country_code":"US","type":"education","lineage":["https://openalex.org/I859038795"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Ming Cheng","raw_affiliation_strings":["Virginia Tech, Blacksburg, VA, USA"],"affiliations":[{"raw_affiliation_string":"Virginia Tech, Blacksburg, VA, USA","institution_ids":["https://openalex.org/I859038795"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109703862","display_name":"Tong Wu","orcid":"https://orcid.org/0009-0004-8310-6501"},"institutions":[{"id":"https://openalex.org/I859038795","display_name":"Virginia Tech","ror":"https://ror.org/02smfhw86","country_code":"US","type":"education","lineage":["https://openalex.org/I859038795"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Tong Wu","raw_affiliation_strings":["Virginia Tech, Blacksburg, VA, USA"],"affiliations":[{"raw_affiliation_string":"Virginia Tech, Blacksburg, VA, USA","institution_ids":["https://openalex.org/I859038795"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113091624","display_name":"Jiazhen Hu","orcid":"https://orcid.org/0009-0007-6950-1910"},"institutions":[{"id":"https://openalex.org/I859038795","display_name":"Virginia Tech","ror":"https://ror.org/02smfhw86","country_code":"US","type":"education","lineage":["https://openalex.org/I859038795"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jiazhen Hu","raw_affiliation_strings":["Virginia Tech, Blacksburg, VA, USA"],"affiliations":[{"raw_affiliation_string":"Virginia Tech, Blacksburg, VA, USA","institution_ids":["https://openalex.org/I859038795"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5089203379","display_name":"J. K. Gong","orcid":"https://orcid.org/0000-0001-8945-6909"},"institutions":[{"id":"https://openalex.org/I859038795","display_name":"Virginia Tech","ror":"https://ror.org/02smfhw86","country_code":"US","type":"education","lineage":["https://openalex.org/I859038795"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jiaying Gong","raw_affiliation_strings":["Virginia Tech, Blacksburg, VA, USA"],"affiliations":[{"raw_affiliation_string":"Virginia Tech, Blacksburg, VA, USA","institution_ids":["https://openalex.org/I859038795"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5038305381","display_name":"Hoda Eldardiry","orcid":"https://orcid.org/0000-0002-9712-6667"},"institutions":[{"id":"https://openalex.org/I859038795","display_name":"Virginia Tech","ror":"https://ror.org/02smfhw86","country_code":"US","type":"education","lineage":["https://openalex.org/I859038795"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Hoda Eldardiry","raw_affiliation_strings":["Virginia Tech, Blacksburg, VA, USA"],"affiliations":[{"raw_affiliation_string":"Virginia Tech, Blacksburg, VA, USA","institution_ids":["https://openalex.org/I859038795"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5109701297"],"corresponding_institution_ids":["https://openalex.org/I859038795"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.18330172,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"6340","last_page":"6345"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.36070001125335693,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.36070001125335693,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.22689999639987946,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10664","display_name":"Sentiment Analysis and Opinion Mining","score":0.0877000018954277,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.8458999991416931},{"id":"https://openalex.org/keywords/usability","display_name":"Usability","score":0.5982000231742859},{"id":"https://openalex.org/keywords/structuring","display_name":"Structuring","score":0.5196999907493591},{"id":"https://openalex.org/keywords/information-extraction","display_name":"Information extraction","score":0.4472000002861023},{"id":"https://openalex.org/keywords/product","display_name":"Product (mathematics)","score":0.43970000743865967},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.43650001287460327}],"concepts":[{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.8458999991416931},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7807000279426575},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.6370000243186951},{"id":"https://openalex.org/C170130773","wikidata":"https://www.wikidata.org/wiki/Q216378","display_name":"Usability","level":2,"score":0.5982000231742859},{"id":"https://openalex.org/C2775945657","wikidata":"https://www.wikidata.org/wiki/Q381442","display_name":"Structuring","level":2,"score":0.5196999907493591},{"id":"https://openalex.org/C195807954","wikidata":"https://www.wikidata.org/wiki/Q1662562","display_name":"Information extraction","level":2,"score":0.4472000002861023},{"id":"https://openalex.org/C90673727","wikidata":"https://www.wikidata.org/wiki/Q901718","display_name":"Product (mathematics)","level":2,"score":0.43970000743865967},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.43650001287460327},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4260999858379364},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.424699991941452},{"id":"https://openalex.org/C2776291640","wikidata":"https://www.wikidata.org/wiki/Q2912517","display_name":"Value (mathematics)","level":2,"score":0.350600004196167},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.3463999927043915},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.29100000858306885},{"id":"https://openalex.org/C182306322","wikidata":"https://www.wikidata.org/wiki/Q1779371","display_name":"Order (exchange)","level":2,"score":0.2770000100135803},{"id":"https://openalex.org/C45804977","wikidata":"https://www.wikidata.org/wiki/Q7239673","display_name":"Predictive modelling","level":2,"score":0.26170000433921814},{"id":"https://openalex.org/C2777466982","wikidata":"https://www.wikidata.org/wiki/Q5227287","display_name":"Data extraction","level":3,"score":0.2540999948978424}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3746252.3761621","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3746252.3761621","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 34th ACM International Conference on Information and Knowledge Management","raw_type":"proceedings-article"},{"id":"pmh:oai:vtechworks.lib.vt.edu:10919/139807","is_oa":true,"landing_page_url":"https://hdl.handle.net/10919/139807","pdf_url":null,"source":{"id":"https://openalex.org/S4306400248","display_name":"VTechWorks (Virginia Tech)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I859038795","host_organization_name":"Virginia Tech","host_organization_lineage":["https://openalex.org/I859038795"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":null,"raw_type":"Text"}],"best_oa_location":{"id":"doi:10.1145/3746252.3761621","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3746252.3761621","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 34th ACM International Conference on Information and Knowledge Management","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":14,"referenced_works":["https://openalex.org/W2805173585","https://openalex.org/W2951865668","https://openalex.org/W4224313570","https://openalex.org/W4224919569","https://openalex.org/W4312614039","https://openalex.org/W4319300823","https://openalex.org/W4321485547","https://openalex.org/W4385571009","https://openalex.org/W4385965955","https://openalex.org/W4400529373","https://openalex.org/W4403780613","https://openalex.org/W4404784276","https://openalex.org/W4411119767","https://openalex.org/W4412886863"],"related_works":[],"abstract_inverted_index":{"Attribute":[0],"Value":[1],"Extraction":[2],"(AVE)":[3],"is":[4,145],"important":[5],"for":[6,26,148,164],"structuring":[7],"product":[8,27],"information":[9],"in":[10,81,140],"e-commerce.":[11],"However,":[12],"existing":[13],"AVE":[14,48,134],"datasets":[15],"are":[16,166],"primarily":[17],"limited":[18],"to":[19,74,95],"text-to-text":[20],"or":[21],"image-to-text":[22],"settings,":[23,142],"lacking":[24],"support":[25],"videos,":[28],"diverse":[29],"attribute":[30],"coverage,":[31],"and":[32,54,89,122,143,161],"public":[33],"availability.":[34],"To":[35,59],"address":[36],"these":[37],"gaps,":[38],"we":[39,63,102],"introduce":[40],"VideoAVE,":[41],"the":[42,76,97,100],"first":[43],"publicly":[44],"available":[45,167],"video-to-text":[46,133],"e-commerce":[47],"dataset":[49,84,160],"across":[50],"14":[51],"different":[52],"domains":[53],"covering":[55],"172":[56],"unique":[57],"attributes.":[58],"ensure":[60],"data":[61,88],"quality,":[62],"propose":[64],"a":[65,82,105,136],"post-hoc":[66],"CLIP-based":[67],"Mixture":[68],"of":[69,85,99,154],"Experts":[70],"filtering":[71],"system":[72],"(CLIP-MoE)":[73],"remove":[75],"mismatched":[77],"video-product":[78],"pairs,":[79],"resulting":[80],"refined":[83],"224k":[86],"training":[87],"25k":[90],"evaluation":[91],"data.":[92],"In":[93],"order":[94],"evaluate":[96],"usability":[98],"dataset,":[101],"further":[103],"establish":[104],"comprehensive":[106],"benchmark":[107,162],"by":[108],"evaluating":[109],"several":[110],"state-of-the-art":[111],"video":[112],"vision":[113],"language":[114],"models":[115],"(VLMs)":[116],"under":[117],"both":[118],"attribute-conditioned":[119],"value":[120],"prediction":[121],"open":[123,141],"attribute-value":[124],"pair":[125],"extraction":[126],"tasks.":[127],"Our":[128],"results":[129],"analysis":[130],"reveals":[131],"that":[132],"remains":[135],"challenging":[137],"problem,":[138],"particularly":[139],"there":[144],"still":[146],"room":[147],"developing":[149],"more":[150],"advanced":[151],"VLMs":[152],"capable":[153],"leveraging":[155],"effective":[156],"temporal":[157],"information.":[158],"The":[159],"code":[163],"VideoAVE":[165],"at:":[168],"https://github.com/gjiaying/VideoAVE.":[169]},"counts_by_year":[],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-11-08T00:00:00"}
