{"id":"https://openalex.org/W7138275284","doi":"https://doi.org/10.1609/aaai.v40i17.38471","title":"MISF: MLLM Guided Iterative Sample Filtering for Data Fault Detection","display_name":"MISF: MLLM Guided Iterative Sample Filtering for Data Fault Detection","publication_year":2026,"publication_date":"2026-03-14","ids":{"openalex":"https://openalex.org/W7138275284","doi":"https://doi.org/10.1609/aaai.v40i17.38471"},"language":null,"primary_location":{"id":"doi:10.1609/aaai.v40i17.38471","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i17.38471","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/38471/42433","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://ojs.aaai.org/index.php/AAAI/article/download/38471/42433","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5129747039","display_name":"Guoying Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Guoying Chen","raw_affiliation_strings":["Beijing Institute of Computer Technology and Application"],"affiliations":[{"raw_affiliation_string":"Beijing Institute of Computer Technology and Application","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5061717616","display_name":"Ruizhuo Zhao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ruizhuo Zhao","raw_affiliation_strings":["Beijing Institute of Computer Technology and Application"],"affiliations":[{"raw_affiliation_string":"Beijing Institute of Computer Technology and Application","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129647540","display_name":"Zhewei Xu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhewei Xu","raw_affiliation_strings":["Beijing Institute of Computer Technology and Application"],"affiliations":[{"raw_affiliation_string":"Beijing Institute of Computer Technology and Application","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129690385","display_name":"Bo Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bo Yang","raw_affiliation_strings":["Beijing Institute of Computer Technology and Application"],"affiliations":[{"raw_affiliation_string":"Beijing Institute of Computer Technology and Application","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5032900115","display_name":"KunLong Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kunlong Wang","raw_affiliation_strings":["Beijing Institute of Computer Technology and Application"],"affiliations":[{"raw_affiliation_string":"Beijing Institute of Computer Technology and Application","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5129747039"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.4866167,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"40","issue":"17","first_page":"14538","last_page":"14546"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.5038999915122986,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.5038999915122986,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.1404999941587448,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.05480000004172325,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/initialization","display_name":"Initialization","score":0.6195999979972839},{"id":"https://openalex.org/keywords/classifier","display_name":"Classifier (UML)","score":0.5425000190734863},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.5357999801635742},{"id":"https://openalex.org/keywords/fault-detection-and-isolation","display_name":"Fault detection and isolation","score":0.474700003862381},{"id":"https://openalex.org/keywords/annotation","display_name":"Annotation","score":0.4675000011920929},{"id":"https://openalex.org/keywords/iterative-method","display_name":"Iterative method","score":0.4090999960899353},{"id":"https://openalex.org/keywords/domain-adaptation","display_name":"Domain adaptation","score":0.40549999475479126},{"id":"https://openalex.org/keywords/sample","display_name":"Sample (material)","score":0.3799000084400177},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.3774000108242035}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8252999782562256},{"id":"https://openalex.org/C114466953","wikidata":"https://www.wikidata.org/wiki/Q6034165","display_name":"Initialization","level":2,"score":0.6195999979972839},{"id":"https://openalex.org/C95623464","wikidata":"https://www.wikidata.org/wiki/Q1096149","display_name":"Classifier (UML)","level":2,"score":0.5425000190734863},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5371999740600586},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.5357999801635742},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5246999859809875},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.5099999904632568},{"id":"https://openalex.org/C152745839","wikidata":"https://www.wikidata.org/wiki/Q5438153","display_name":"Fault detection and isolation","level":3,"score":0.474700003862381},{"id":"https://openalex.org/C2776321320","wikidata":"https://www.wikidata.org/wiki/Q857525","display_name":"Annotation","level":2,"score":0.4675000011920929},{"id":"https://openalex.org/C159694833","wikidata":"https://www.wikidata.org/wiki/Q2321565","display_name":"Iterative method","level":2,"score":0.4090999960899353},{"id":"https://openalex.org/C2776434776","wikidata":"https://www.wikidata.org/wiki/Q19246213","display_name":"Domain adaptation","level":3,"score":0.40549999475479126},{"id":"https://openalex.org/C198531522","wikidata":"https://www.wikidata.org/wiki/Q485146","display_name":"Sample (material)","level":2,"score":0.3799000084400177},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.3774000108242035},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.37599998712539673},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.35899999737739563},{"id":"https://openalex.org/C24756922","wikidata":"https://www.wikidata.org/wiki/Q1757694","display_name":"Data quality","level":3,"score":0.33899998664855957},{"id":"https://openalex.org/C117619785","wikidata":"https://www.wikidata.org/wiki/Q6094414","display_name":"Iterative learning control","level":3,"score":0.3377000093460083},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.33629998564720154},{"id":"https://openalex.org/C143587482","wikidata":"https://www.wikidata.org/wiki/Q1543216","display_name":"Iterative and incremental development","level":2,"score":0.32179999351501465},{"id":"https://openalex.org/C2776145971","wikidata":"https://www.wikidata.org/wiki/Q30673951","display_name":"Labeled data","level":2,"score":0.31139999628067017},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.2937000095844269},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.28999999165534973},{"id":"https://openalex.org/C136389625","wikidata":"https://www.wikidata.org/wiki/Q334384","display_name":"Supervised learning","level":3,"score":0.28859999775886536},{"id":"https://openalex.org/C160920958","wikidata":"https://www.wikidata.org/wiki/Q7662746","display_name":"Synthetic data","level":2,"score":0.2827000021934509},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.28130000829696655},{"id":"https://openalex.org/C207685749","wikidata":"https://www.wikidata.org/wiki/Q2088941","display_name":"Domain knowledge","level":2,"score":0.2614000141620636},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.25529998540878296}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1609/aaai.v40i17.38471","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i17.38471","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/38471/42433","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1609/aaai.v40i17.38471","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i17.38471","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/38471/42433","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W7138275284.pdf","grobid_xml":"https://content.openalex.org/works/W7138275284.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"High":[0],"quality":[1,156],"datasets":[2,136],"are":[3],"critical":[4],"for":[5,34,153],"training":[6],"reliable":[7],"machine":[8],"learning":[9,60],"models,":[10],"yet":[11],"data":[12,93,142],"faults":[13],"caused":[14],"by":[15,115],"insufficient":[16],"annotation":[17],"expertise":[18],"or":[19],"malicious":[20],"poisoning":[21],"attacks":[22],"remain":[23],"prevalent.":[24],"Traditional":[25],"classifier":[26],"based":[27,52,89],"methods":[28,53],"rely":[29],"on":[30,131],"manually":[31],"curated":[32,108],"subsets":[33],"fault":[35],"detection,":[36],"but":[37],"their":[38,58],"limited":[39],"scale":[40],"frequently":[41],"leads":[42],"to":[43],"model":[44,100],"overfitting.":[45],"While":[46],"multimodal":[47],"large":[48],"language":[49],"models":[50],"(MLLMs)":[51],"offer":[54],"promising":[55],"detection":[56,99,127],"capabilities,":[57],"few-shot":[59],"limitations":[61],"hinder":[62],"generalization":[63],"in":[64,157],"domain":[65,124],"specific":[66],"tasks.":[67],"To":[68],"address":[69],"these":[70],"challenges,":[71],"we":[72],"propose":[73],"MLLM":[74,88,102],"Guided":[75],"Iterative":[76],"Sample":[77],"Filtering":[78],"(MISF),":[79],"a":[80,107,149],"novel":[81],"framework":[82,96],"that":[83,138],"combines":[84],"the":[85,98],"strengths":[86],"of":[87],"initialization":[90],"and":[91,106,126,133],"iterative":[92],"refinement.":[94],"Our":[95],"initializes":[97],"with":[101],"generated":[103],"synthetic":[104],"images":[105],"clean":[109,120],"subset,":[110],"then":[111],"iteratively":[112],"refines":[113],"it":[114],"progressively":[116],"selecting":[117],"high":[118],"certainty":[119],"samples,":[121],"improving":[122,154],"both":[123],"adaptation":[125],"accuracy.":[128],"Extensive":[129],"experiments":[130],"RESISC45":[132],"Oxford-IIIT":[134],"Pets":[135],"demonstrate":[137],"MISF":[139,147],"effectively":[140],"identifies":[141],"faults,":[143],"outperforming":[144],"existing":[145],"approaches.":[146],"provides":[148],"robust,":[150],"scalable":[151],"solution":[152],"dataset":[155],"specialized":[158],"domains.":[159]},"counts_by_year":[],"updated_date":"2026-03-20T20:47:17.329874","created_date":"2026-03-18T00:00:00"}
