{"id":"https://openalex.org/W4416515907","doi":"https://doi.org/10.48550/arxiv.2506.23465","title":"Sanitizing Manufacturing Dataset Labels Using Vision-Language Models","display_name":"Sanitizing Manufacturing Dataset Labels Using Vision-Language Models","publication_year":2025,"publication_date":"2025-06-30","ids":{"openalex":"https://openalex.org/W4416515907","doi":"https://doi.org/10.48550/arxiv.2506.23465"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2506.23465","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2506.23465","pdf_url":"https://arxiv.org/pdf/2506.23465","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite","doaj"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2506.23465","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5098767835","display_name":"Nazanin Mahjourian","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mahjourian, Nazanin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":null,"display_name":"Nguyen, Vinh","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Nguyen, Vinh","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.859000027179718,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.859000027179718,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.05510000139474869,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.009100000374019146,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.5936999917030334},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.5658000111579895},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.5529000163078308},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.5331000089645386},{"id":"https://openalex.org/keywords/cosine-similarity","display_name":"Cosine similarity","score":0.5033000111579895},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.45730000734329224},{"id":"https://openalex.org/keywords/vocabulary","display_name":"Vocabulary","score":0.4449000060558319},{"id":"https://openalex.org/keywords/visualization","display_name":"Visualization","score":0.39399999380111694},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.38670000433921814}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.742900013923645},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.609499990940094},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.5936999917030334},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.5658000111579895},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.5529000163078308},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.5331000089645386},{"id":"https://openalex.org/C2780762811","wikidata":"https://www.wikidata.org/wiki/Q1784941","display_name":"Cosine similarity","level":3,"score":0.5033000111579895},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4731999933719635},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.45730000734329224},{"id":"https://openalex.org/C2777601683","wikidata":"https://www.wikidata.org/wiki/Q6499736","display_name":"Vocabulary","level":2,"score":0.4449000060558319},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.39399999380111694},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.38670000433921814},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.36649999022483826},{"id":"https://openalex.org/C2779982483","wikidata":"https://www.wikidata.org/wiki/Q6094420","display_name":"Iterative refinement","level":2,"score":0.34040001034736633},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3400999903678894},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.3395000100135803},{"id":"https://openalex.org/C130318100","wikidata":"https://www.wikidata.org/wiki/Q2268914","display_name":"Semantic similarity","level":2,"score":0.33799999952316284},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.31619998812675476},{"id":"https://openalex.org/C2778572836","wikidata":"https://www.wikidata.org/wiki/Q380933","display_name":"Space (punctuation)","level":2,"score":0.3034999966621399},{"id":"https://openalex.org/C55020928","wikidata":"https://www.wikidata.org/wiki/Q3813865","display_name":"Image quality","level":3,"score":0.29089999198913574},{"id":"https://openalex.org/C111335779","wikidata":"https://www.wikidata.org/wiki/Q3454686","display_name":"Reduction (mathematics)","level":2,"score":0.28870001435279846},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.27399998903274536},{"id":"https://openalex.org/C143587482","wikidata":"https://www.wikidata.org/wiki/Q1543216","display_name":"Iterative and incremental development","level":2,"score":0.26510000228881836},{"id":"https://openalex.org/C39235581","wikidata":"https://www.wikidata.org/wiki/Q5158434","display_name":"Conceptual clustering","level":5,"score":0.2581999897956848},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.2581999897956848},{"id":"https://openalex.org/C77967617","wikidata":"https://www.wikidata.org/wiki/Q4677561","display_name":"Active learning (machine learning)","level":2,"score":0.25270000100135803}],"mesh":[],"locations_count":4,"locations":[{"id":"pmh:oai:arXiv.org:2506.23465","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2506.23465","pdf_url":"https://arxiv.org/pdf/2506.23465","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"pmh:oai:digitalcommons.mtu.edu:michigantech-p2-3549","is_oa":true,"landing_page_url":"https://digitalcommons.mtu.edu/michigantech-p2/2487","pdf_url":null,"source":{"id":"https://openalex.org/S4377196391","display_name":"Digital Commons - Michigan Tech (Michigan Technological University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I11957088","host_organization_name":"Michigan Technological University","host_organization_lineage":["https://openalex.org/I11957088"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Michigan Tech Publications","raw_type":"text"},{"id":"pmh:oai:doaj.org/article:ac1dba772ec64c59bdf4c6d72d0545cc","is_oa":true,"landing_page_url":"https://doaj.org/article/ac1dba772ec64c59bdf4c6d72d0545cc","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Machine Learning with Applications, Vol 24, Iss , Pp 100893- (2026)","raw_type":"article"},{"id":"doi:10.48550/arxiv.2506.23465","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2506.23465","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2506.23465","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2506.23465","pdf_url":"https://arxiv.org/pdf/2506.23465","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"The":[0,174],"success":[1],"of":[2,15,194],"machine":[3,233],"learning":[4,234],"models":[5,235],"in":[6,46,75,106,219,236],"industrial":[7,237],"applications":[8,238],"is":[9,43,53,66,119,188],"heavily":[10],"dependent":[11],"on":[12,157],"the":[13,16,21,96,111,131,152,192,195,202,227],"quality":[14,229],"datasets":[17],"used":[18],"to":[19,121,165,190],"train":[20],"models.":[22],"However,":[23],"large-scale":[24],"datasets,":[25],"specially":[26],"those":[27],"constructed":[28],"from":[29,35,181],"crowd-sourcing":[30],"and":[31,39,55,62,73,85,129,148,185,209],"web-scraping,":[32],"often":[33],"suffer":[34],"label":[36,71,117,135,149,172,211,220],"noise,":[37],"inconsistencies,":[38],"errors.":[40],"This":[41,57,80,213],"problem":[42],"particularly":[44],"pronounced":[45],"manufacturing":[47,77],"domains,":[48],"where":[49],"obtaining":[50],"high-quality":[51],"labels":[52,89,169,180,208],"costly":[54],"time-consuming.":[56],"paper":[58],"introduces":[59],"Vision-Language":[60],"Sanitization":[61],"Refinement":[63],"(VLSR),":[64],"which":[65,177,224],"a":[67,91,216],"vision-language-based":[68],"framework":[69,204],"for":[70,136,230],"sanitization":[72,118],"refinement":[74],"multi-label":[76],"image":[78,138,147],"datasets.":[79],"method":[81,153,214],"embeds":[82],"both":[83,182],"images":[84],"their":[86],"associated":[87],"textual":[88],"into":[90,170],"shared":[92],"semantic":[93],"space":[94],"leveraging":[95],"CLIP":[97],"vision-language":[98],"model.":[99],"Then":[100],"two":[101],"key":[102],"tasks":[103],"are":[104],"addressed":[105],"this":[107],"process":[108],"by":[109,139,161],"computing":[110],"cosine":[112,144],"similarity":[113,145],"between":[114,146],"embeddings.":[115,150],"First,":[116],"performed":[120],"identify":[122],"irrelevant,":[123],"misspelled,":[124],"or":[125],"semantically":[126,133,167],"weak":[127],"labels,":[128],"surface":[130],"most":[132],"aligned":[134],"each":[137],"comparing":[140],"image-label":[141],"pairs":[142],"using":[143],"Second,":[151],"applies":[154],"density-based":[155],"clustering":[156],"text":[158],"embeddings,":[159],"followed":[160],"iterative":[162],"cluster":[163],"merging,":[164],"group":[166],"similar":[168],"unified":[171],"groups.":[173],"Factorynet":[175],"dataset,":[176],"includes":[178],"noisy":[179],"human":[183,241],"annotations":[184],"web-scraped":[186],"sources,":[187],"employed":[189],"evaluate":[191],"effectiveness":[193],"proposed":[196],"framework.":[197],"Experimental":[198],"results":[199],"demonstrate":[200],"that":[201],"VLSR":[203],"successfully":[205],"identifies":[206],"problematic":[207],"improves":[210],"consistency.":[212],"enables":[215],"significant":[217],"reduction":[218],"vocabulary":[221],"through":[222],"clustering,":[223],"ultimately":[225],"enhances":[226],"dataset's":[228],"training":[231],"robust":[232],"with":[239],"minimal":[240],"intervention.":[242]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
