{"id":"https://openalex.org/W4415524335","doi":"https://doi.org/10.1109/mlsp62443.2025.11204330","title":"Trust the Model: Compact VLMS as In-Context Judges for Image-Text Data Quality","display_name":"Trust the Model: Compact VLMS as In-Context Judges for Image-Text Data Quality","publication_year":2025,"publication_date":"2025-08-31","ids":{"openalex":"https://openalex.org/W4415524335","doi":"https://doi.org/10.1109/mlsp62443.2025.11204330"},"language":null,"primary_location":{"id":"doi:10.1109/mlsp62443.2025.11204330","is_oa":false,"landing_page_url":"https://doi.org/10.1109/mlsp62443.2025.11204330","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE 35th International Workshop on Machine Learning for Signal Processing (MLSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5114311598","display_name":"Daulet Toibazar","orcid":null},"institutions":[{"id":"https://openalex.org/I4210137075","display_name":"Centre de Recherche en Nutrition Humaine d'Auvergne","ror":"https://ror.org/03rzyjb72","country_code":"FR","type":"facility","lineage":["https://openalex.org/I4210088668","https://openalex.org/I4210137075"]}],"countries":["FR"],"is_corresponding":true,"raw_author_name":"Daulet Toibazar","raw_affiliation_strings":["Humain,Riyadh,KSA"],"affiliations":[{"raw_affiliation_string":"Humain,Riyadh,KSA","institution_ids":["https://openalex.org/I4210137075"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5024001271","display_name":"Kesen Wang","orcid":"https://orcid.org/0000-0002-8820-1629"},"institutions":[{"id":"https://openalex.org/I4210137075","display_name":"Centre de Recherche en Nutrition Humaine d'Auvergne","ror":"https://ror.org/03rzyjb72","country_code":"FR","type":"facility","lineage":["https://openalex.org/I4210088668","https://openalex.org/I4210137075"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Kesen Wang","raw_affiliation_strings":["Humain,Riyadh,KSA"],"affiliations":[{"raw_affiliation_string":"Humain,Riyadh,KSA","institution_ids":["https://openalex.org/I4210137075"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101793764","display_name":"Sherif R. Mohamed","orcid":"https://orcid.org/0000-0002-5846-6060"},"institutions":[{"id":"https://openalex.org/I4210137075","display_name":"Centre de Recherche en Nutrition Humaine d'Auvergne","ror":"https://ror.org/03rzyjb72","country_code":"FR","type":"facility","lineage":["https://openalex.org/I4210088668","https://openalex.org/I4210137075"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Sherif Mohamed","raw_affiliation_strings":["Humain,Riyadh,KSA"],"affiliations":[{"raw_affiliation_string":"Humain,Riyadh,KSA","institution_ids":["https://openalex.org/I4210137075"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010794784","display_name":"Abdulaziz Al-Badawi","orcid":null},"institutions":[{"id":"https://openalex.org/I4210137075","display_name":"Centre de Recherche en Nutrition Humaine d'Auvergne","ror":"https://ror.org/03rzyjb72","country_code":"FR","type":"facility","lineage":["https://openalex.org/I4210088668","https://openalex.org/I4210137075"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Abdulaziz Al-Badawi","raw_affiliation_strings":["Humain,Riyadh,KSA"],"affiliations":[{"raw_affiliation_string":"Humain,Riyadh,KSA","institution_ids":["https://openalex.org/I4210137075"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5120126669","display_name":"Abdulrahman Alfulayt","orcid":null},"institutions":[{"id":"https://openalex.org/I4210137075","display_name":"Centre de Recherche en Nutrition Humaine d'Auvergne","ror":"https://ror.org/03rzyjb72","country_code":"FR","type":"facility","lineage":["https://openalex.org/I4210088668","https://openalex.org/I4210137075"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Abdulrahman Alfulayt","raw_affiliation_strings":["Humain,Riyadh,KSA"],"affiliations":[{"raw_affiliation_string":"Humain,Riyadh,KSA","institution_ids":["https://openalex.org/I4210137075"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5103874391","display_name":"Pedro J. Moreno","orcid":null},"institutions":[{"id":"https://openalex.org/I4210137075","display_name":"Centre de Recherche en Nutrition Humaine d'Auvergne","ror":"https://ror.org/03rzyjb72","country_code":"FR","type":"facility","lineage":["https://openalex.org/I4210088668","https://openalex.org/I4210137075"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Pedro J. Moreno","raw_affiliation_strings":["Humain,Riyadh,KSA"],"affiliations":[{"raw_affiliation_string":"Humain,Riyadh,KSA","institution_ids":["https://openalex.org/I4210137075"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5114311598"],"corresponding_institution_ids":["https://openalex.org/I4210137075"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.40081182,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12983","display_name":"Satellite Image Processing and Photogrammetry","score":0.7822999954223633,"subfield":{"id":"https://openalex.org/subfields/2212","display_name":"Ocean Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12983","display_name":"Satellite Image Processing and Photogrammetry","score":0.7822999954223633,"subfield":{"id":"https://openalex.org/subfields/2212","display_name":"Ocean Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11164","display_name":"Remote Sensing and LiDAR Applications","score":0.7555000185966492,"subfield":{"id":"https://openalex.org/subfields/2305","display_name":"Environmental Engineering"},"field":{"id":"https://openalex.org/fields/23","display_name":"Environmental Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10689","display_name":"Remote-Sensing Image Classification","score":0.7232999801635742,"subfield":{"id":"https://openalex.org/subfields/2214","display_name":"Media Technology"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/filtration","display_name":"Filtration (mathematics)","score":0.593500018119812},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.5590999722480774},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.5547000169754028},{"id":"https://openalex.org/keywords/filter","display_name":"Filter (signal processing)","score":0.5087000131607056},{"id":"https://openalex.org/keywords/data-modeling","display_name":"Data modeling","score":0.4334999918937683},{"id":"https://openalex.org/keywords/data-quality","display_name":"Data quality","score":0.42899999022483826},{"id":"https://openalex.org/keywords/training","display_name":"Training (meteorology)","score":0.3779999911785126}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7229999899864197},{"id":"https://openalex.org/C128489963","wikidata":"https://www.wikidata.org/wiki/Q1187724","display_name":"Filtration (mathematics)","level":2,"score":0.593500018119812},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.5590999722480774},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.5547000169754028},{"id":"https://openalex.org/C106131492","wikidata":"https://www.wikidata.org/wiki/Q3072260","display_name":"Filter (signal processing)","level":2,"score":0.5087000131607056},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.47429999709129333},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.4334999918937683},{"id":"https://openalex.org/C24756922","wikidata":"https://www.wikidata.org/wiki/Q1757694","display_name":"Data quality","level":3,"score":0.42899999022483826},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.39329999685287476},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.3779999911785126},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3479999899864197},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.3292999863624573},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.32280001044273376},{"id":"https://openalex.org/C2776502983","wikidata":"https://www.wikidata.org/wiki/Q690182","display_name":"Contrast (vision)","level":2,"score":0.3208000063896179},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.31470000743865967},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.27880001068115234},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.27639999985694885},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.27549999952316284},{"id":"https://openalex.org/C2780009758","wikidata":"https://www.wikidata.org/wiki/Q6804172","display_name":"Measure (data warehouse)","level":2,"score":0.25949999690055847},{"id":"https://openalex.org/C136389625","wikidata":"https://www.wikidata.org/wiki/Q334384","display_name":"Supervised learning","level":3,"score":0.2549000084400177},{"id":"https://openalex.org/C118643609","wikidata":"https://www.wikidata.org/wiki/Q189210","display_name":"Web application","level":2,"score":0.25290000438690186}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/mlsp62443.2025.11204330","is_oa":false,"landing_page_url":"https://doi.org/10.1109/mlsp62443.2025.11204330","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE 35th International Workshop on Machine Learning for Signal Processing (MLSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":7,"referenced_works":["https://openalex.org/W2886641317","https://openalex.org/W3003257820","https://openalex.org/W3176641147","https://openalex.org/W4287854593","https://openalex.org/W4402727669","https://openalex.org/W4404781545","https://openalex.org/W4407667962"],"related_works":[],"abstract_inverted_index":{"Vision-language":[0],"models":[1,8],"(VLMs)":[2],"extend":[3],"the":[4,20,56,119,131],"conventional":[5],"large":[6],"language":[7],"by":[9,61],"integrating":[10],"visual":[11,27],"data,":[12,149,209],"enabling":[13],"richer":[14],"multimodal":[15],"reasoning":[16],"and":[17,44,87,95,98,136,153,177,201,212],"significantly":[18],"broadens":[19],"practical":[21],"applications":[22],"of":[23,58,111,123],"AI.":[24],"However,":[25],"including":[26],"inputs":[28],"also":[29],"brings":[30],"new":[31],"challenges":[32],"in":[33],"maintaining":[34],"data":[35,68,214],"quality.":[36],"Empirical":[37],"evidence":[38],"consistently":[39],"shows":[40],"that":[41,71,160],"carefully":[42],"curated":[43],"representative":[45],"training":[46,90,138,198,208],"examples":[47],"often":[48],"yield":[49],"superior":[50],"results":[51,158],"compared":[52],"to":[53],"simply":[54],"increasing":[55],"quantity":[57],"data.":[59],"Inspired":[60],"this":[62],"observation,":[63],"we":[64],"introduce":[65],"a":[66,73,78,124,189],"streamlined":[67],"filtration":[69,107,164,206],"framework":[70],"employs":[72],"compact":[74,167,204],"VLM,":[75],"fine-tuned":[76],"on":[77,93,109,170],"high-quality":[79,196],"image-caption":[80],"annotated":[81],"dataset.":[82],"This":[83,128],"model":[84,142],"effectively":[85],"evaluates":[86],"filters":[88,144],"potential":[89],"samples":[91],"based":[92],"caption":[94,154],"image":[96],"quality":[97],"alignment.":[99],"Unlike":[100],"previous":[101],"approaches,":[102],"which":[103],"typically":[104],"add":[105],"auxiliary":[106],"modules":[108,135],"top":[110],"existing":[112],"full-scale":[113],"VLMs,":[114],"our":[115,166,186],"method":[116,187],"exclusively":[117],"utilizes":[118],"inherent":[120],"evaluative":[121],"capability":[122],"purpose-built":[125],"small":[126],"VLM.":[127],"strategy":[129],"eliminates":[130],"need":[132],"for":[133,194],"extra":[134],"reduces":[137],"overhead.":[139],"Our":[140,203],"lightweight":[141,190],"efficiently":[143],"out":[145],"inaccurate,":[146],"noisy":[147],"web":[148,183],"improving":[150],"image-text":[151],"alignment":[152],"linguistic":[155],"fluency.":[156],"Experimental":[157],"show":[159],"datasets":[161,179],"underwent":[162],"high-precision":[163],"using":[165],"VLM":[168,205],"perform":[169],"par":[171],"with,":[172],"or":[173],"even":[174],"surpass,":[175],"larger":[176],"noisier":[178],"gathered":[180],"through":[181],"high-volume":[182],"crawling.":[184],"Thus,":[185],"provides":[188],"yet":[191],"robust":[192],"solution":[193],"building":[195],"vision-language":[197],"corpora.":[199],"Availability":[200],"implementation:":[202],"model,":[207],"utility":[210],"scripts,":[211],"Supplementary":[213],"(Appendices)":[215],"are":[216],"freely":[217],"available":[218],"at":[219],"https://github.com/daulettoibazar/Compact_VLM_Filter.":[220]},"counts_by_year":[],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-10-24T00:00:00"}
