{"id":"https://openalex.org/W7147679194","doi":"https://doi.org/10.1109/icaiic68212.2026.11454372","title":"Enhancing Large Vision\u2013Language Models for Multimodal Defect Detection via SFT\u2013GRPO Reinforcement Learning","display_name":"Enhancing Large Vision\u2013Language Models for Multimodal Defect Detection via SFT\u2013GRPO Reinforcement Learning","publication_year":2026,"publication_date":"2026-02-24","ids":{"openalex":"https://openalex.org/W7147679194","doi":"https://doi.org/10.1109/icaiic68212.2026.11454372"},"language":null,"primary_location":{"id":"doi:10.1109/icaiic68212.2026.11454372","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icaiic68212.2026.11454372","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2026 International Conference on Artificial Intelligence in Information and Communication (ICAIIC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5084971994","display_name":"Hung Viet Nguyen","orcid":"https://orcid.org/0000-0002-6981-0891"},"institutions":[{"id":"https://openalex.org/I104338594","display_name":"Inje University","ror":"https://ror.org/04xqwq985","country_code":"KR","type":"education","lineage":["https://openalex.org/I104338594"]}],"countries":["KR"],"is_corresponding":true,"raw_author_name":"Hung Viet Nguyen","raw_affiliation_strings":["INJE University,Department of Digital Anti-Aging Healthcare,Kimhae,Rep. of Korea,50834"],"affiliations":[{"raw_affiliation_string":"INJE University,Department of Digital Anti-Aging Healthcare,Kimhae,Rep. of Korea,50834","institution_ids":["https://openalex.org/I104338594"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132690009","display_name":"Hyojin Park","orcid":null},"institutions":[{"id":"https://openalex.org/I71007489","display_name":"Kyungnam University","ror":"https://ror.org/037pkxm09","country_code":"KR","type":"education","lineage":["https://openalex.org/I71007489"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Hyojin Park","raw_affiliation_strings":["Kyungnam University,Gyeongnam Intelligence Innovation Center (GIIC),Changwon,Rep. of Korea,51767"],"affiliations":[{"raw_affiliation_string":"Kyungnam University,Gyeongnam Intelligence Innovation Center (GIIC),Changwon,Rep. of Korea,51767","institution_ids":["https://openalex.org/I71007489"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125636932","display_name":"Namhyun Yoo","orcid":null},"institutions":[{"id":"https://openalex.org/I71007489","display_name":"Kyungnam University","ror":"https://ror.org/037pkxm09","country_code":"KR","type":"education","lineage":["https://openalex.org/I71007489"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Namhyun Yoo","raw_affiliation_strings":["Kyungnam University,Department of Computer Engineering,Changwon,Rep. of Korea,51767"],"affiliations":[{"raw_affiliation_string":"Kyungnam University,Department of Computer Engineering,Changwon,Rep. of Korea,51767","institution_ids":["https://openalex.org/I71007489"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5114860745","display_name":"Jinhong Yang","orcid":"https://orcid.org/0000-0002-7756-0263"},"institutions":[{"id":"https://openalex.org/I104338594","display_name":"Inje University","ror":"https://ror.org/04xqwq985","country_code":"KR","type":"education","lineage":["https://openalex.org/I104338594"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Jinhong Yang","raw_affiliation_strings":["INJE University,Department of Medical IT,Kimhae,Rep. of Korea,50834"],"affiliations":[{"raw_affiliation_string":"INJE University,Department of Medical IT,Kimhae,Rep. of Korea,50834","institution_ids":["https://openalex.org/I104338594"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5084971994"],"corresponding_institution_ids":["https://openalex.org/I104338594"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.93674164,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1517","last_page":"1522"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11606","display_name":"Infrastructure Maintenance and Monitoring","score":0.6086000204086304,"subfield":{"id":"https://openalex.org/subfields/2205","display_name":"Civil and Structural Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11606","display_name":"Infrastructure Maintenance and Monitoring","score":0.6086000204086304,"subfield":{"id":"https://openalex.org/subfields/2205","display_name":"Civil and Structural Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.10890000313520432,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12086","display_name":"Structural Integrity and Reliability Analysis","score":0.06769999861717224,"subfield":{"id":"https://openalex.org/subfields/2210","display_name":"Mechanical Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/json","display_name":"JSON","score":0.8245000243186951},{"id":"https://openalex.org/keywords/metadata","display_name":"Metadata","score":0.6556000113487244},{"id":"https://openalex.org/keywords/bounding-overwatch","display_name":"Bounding overwatch","score":0.5504999756813049},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.5045999884605408},{"id":"https://openalex.org/keywords/offset","display_name":"Offset (computer science)","score":0.48570001125335693},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.46389999985694885},{"id":"https://openalex.org/keywords/visual-inspection","display_name":"Visual inspection","score":0.46059998869895935},{"id":"https://openalex.org/keywords/minimum-bounding-box","display_name":"Minimum bounding box","score":0.45019999146461487},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.4106999933719635}],"concepts":[{"id":"https://openalex.org/C2780416260","wikidata":"https://www.wikidata.org/wiki/Q2063","display_name":"JSON","level":2,"score":0.8245000243186951},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7247999906539917},{"id":"https://openalex.org/C93518851","wikidata":"https://www.wikidata.org/wiki/Q180160","display_name":"Metadata","level":2,"score":0.6556000113487244},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5896999835968018},{"id":"https://openalex.org/C63584917","wikidata":"https://www.wikidata.org/wiki/Q333286","display_name":"Bounding overwatch","level":2,"score":0.5504999756813049},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.5045999884605408},{"id":"https://openalex.org/C175291020","wikidata":"https://www.wikidata.org/wiki/Q1156822","display_name":"Offset (computer science)","level":2,"score":0.48570001125335693},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.46389999985694885},{"id":"https://openalex.org/C168820333","wikidata":"https://www.wikidata.org/wiki/Q448889","display_name":"Visual inspection","level":2,"score":0.46059998869895935},{"id":"https://openalex.org/C147037132","wikidata":"https://www.wikidata.org/wiki/Q6865426","display_name":"Minimum bounding box","level":3,"score":0.45019999146461487},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.4106999933719635},{"id":"https://openalex.org/C52146309","wikidata":"https://www.wikidata.org/wiki/Q7431116","display_name":"Schema (genetic algorithms)","level":2,"score":0.4041000008583069},{"id":"https://openalex.org/C115901376","wikidata":"https://www.wikidata.org/wiki/Q184199","display_name":"Automation","level":2,"score":0.40389999747276306},{"id":"https://openalex.org/C2777655017","wikidata":"https://www.wikidata.org/wiki/Q1501161","display_name":"Toolbox","level":2,"score":0.3953999876976013},{"id":"https://openalex.org/C22367795","wikidata":"https://www.wikidata.org/wiki/Q7625208","display_name":"Structured prediction","level":2,"score":0.3779999911785126},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3732999861240387},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3522000014781952},{"id":"https://openalex.org/C169258074","wikidata":"https://www.wikidata.org/wiki/Q245748","display_name":"Random forest","level":2,"score":0.31929999589920044},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.31540000438690186},{"id":"https://openalex.org/C2780387288","wikidata":"https://www.wikidata.org/wiki/Q13693","display_name":"Windshield","level":2,"score":0.31360000371932983},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.2957000136375427},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.28290000557899475},{"id":"https://openalex.org/C186644900","wikidata":"https://www.wikidata.org/wiki/Q194152","display_name":"Parsing","level":2,"score":0.27390000224113464},{"id":"https://openalex.org/C40149104","wikidata":"https://www.wikidata.org/wiki/Q5620977","display_name":"Factory (object-oriented programming)","level":2,"score":0.26489999890327454},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.2639000117778778},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.2540000081062317},{"id":"https://openalex.org/C85847156","wikidata":"https://www.wikidata.org/wiki/Q59015987","display_name":"Verifiable secret sharing","level":3,"score":0.2535000145435333},{"id":"https://openalex.org/C5339829","wikidata":"https://www.wikidata.org/wiki/Q1425977","display_name":"Machine vision","level":2,"score":0.25130000710487366},{"id":"https://openalex.org/C33762810","wikidata":"https://www.wikidata.org/wiki/Q461671","display_name":"Data integrity","level":2,"score":0.25029999017715454}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icaiic68212.2026.11454372","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icaiic68212.2026.11454372","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2026 International Conference on Artificial Intelligence in Information and Communication (ICAIIC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.41598644852638245,"display_name":"Industry, innovation and infrastructure","id":"https://metadata.un.org/sdg/9"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":22,"referenced_works":["https://openalex.org/W2963037989","https://openalex.org/W4226256344","https://openalex.org/W4320477354","https://openalex.org/W4322619914","https://openalex.org/W4386065385","https://openalex.org/W4393158476","https://openalex.org/W4393276712","https://openalex.org/W4400410093","https://openalex.org/W4402381603","https://openalex.org/W4402716302","https://openalex.org/W4402961681","https://openalex.org/W4404997009","https://openalex.org/W4406012864","https://openalex.org/W4406613026","https://openalex.org/W4409368445","https://openalex.org/W4409671560","https://openalex.org/W4412567591","https://openalex.org/W4413144266","https://openalex.org/W4415598949","https://openalex.org/W7087818131","https://openalex.org/W7131386314","https://openalex.org/W7143536265"],"related_works":[],"abstract_inverted_index":{"Large":[0],"Vision":[1],"Language":[2],"Models":[3],"(LVLMs)":[4],"offer":[5],"strong":[6],"visual":[7],"reasoning":[8],"capabilities":[9],"but":[10],"their":[11],"direct":[12],"application":[13],"to":[14,21,51,81,138,157,196],"industrial":[15],"defect":[16,25,57,84,150],"inspection":[17,58,73,94,203],"remains":[18],"limited":[19],"due":[20],"domain":[22],"complexity,":[23],"diverse":[24],"modes,":[26],"and":[27,88,112,143,172,189,200],"the":[28,75],"need":[29],"for":[30,55,149,204],"structured":[31,92],"reporting.":[32],"This":[33],"paper":[34],"presents":[35],"a":[36,67],"unified":[37],"fine-tuning":[38],"framework":[39],"that":[40,103,187],"combines":[41],"Supervised":[42],"Fine-Tuning":[43],"(SFT)":[44],"with":[45,125],"Group":[46],"Relative":[47],"Policy":[48],"Optimization":[49],"(GRPO)":[50],"adapt":[52],"open-source":[53],"LVLMs":[54,195],"multimodal":[56],"in":[59,180],"Liquefied":[60],"Natural":[61],"Gas":[62],"(LNG)":[63],"tank":[64],"manufacturing.":[65],"Using":[66],"balanced":[68],"dataset":[69],"derived":[70],"from":[71,133,153],"188,631":[72],"images,":[74],"proposed":[76],"method":[77],"enables":[78],"each":[79],"LVLM":[80,119],"perform":[82],"joint":[83],"localization,":[85],"attribute":[86],"prediction,":[87],"automatic":[89],"generation":[90],"of":[91],"JSON":[93,105],"reports.":[95],"The":[96,161],"GRPO":[97,190],"stage":[98],"incorporates":[99],"verifiable":[100],"reward":[101],"signals":[102],"enforce":[104],"validity,":[106],"schema":[107],"compliance,":[108],"bounding":[109],"box":[110],"accuracy,":[111],"metadata":[113,182],"consistency.":[114],"Experimental":[115],"results":[116],"on":[117],"four":[118],"architectures":[120],"demonstrate":[121],"substantial":[122],"performance":[123],"gains,":[124],"mean":[126],"Average":[127],"Precision":[128],"at":[129,169],"IoU":[130,170],"0.5":[131,171],"improving":[132],"<tex":[134,139,144,176],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[135,140,145,177],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">$35-39$</tex>":[136],"percent":[137,142,156,167,175],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">$84-89$</tex>":[141],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">$F":[146,178],"1$</tex>":[147,179],"scores":[148],"labels":[151],"increasing":[152],"approximately":[154],"14":[155],"above":[158],"84":[159],"percent.":[160],"best-performing":[162],"model,":[163],"Qwen2.5-VL-7B,":[164],"achieves":[165],"88.77":[166],"mAP":[168],"over":[173],"90":[174],"key":[181],"fields.":[183],"These":[184],"findings":[185],"indicate":[186],"SFT":[188],"provide":[191],"complementary":[192],"benefits,":[193],"enabling":[194],"deliver":[197],"accurate,":[198],"interpretable,":[199],"computationally":[201],"efficient":[202],"next-generation":[205],"manufacturing":[206],"environments.":[207]},"counts_by_year":[],"updated_date":"2026-04-02T13:53:19.096889","created_date":"2026-04-02T00:00:00"}
