{"id":"https://openalex.org/W7131105854","doi":"https://doi.org/10.1109/iccvw69036.2025.00139","title":"SurGen-Net: A Generative Approach for Surgical VQA with Structured Text Generation","display_name":"SurGen-Net: A Generative Approach for Surgical VQA with Structured Text Generation","publication_year":2025,"publication_date":"2025-10-19","ids":{"openalex":"https://openalex.org/W7131105854","doi":"https://doi.org/10.1109/iccvw69036.2025.00139"},"language":null,"primary_location":{"id":"doi:10.1109/iccvw69036.2025.00139","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iccvw69036.2025.00139","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/CVF International Conference on Computer Vision Workshops (ICCVW)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5109263710","display_name":"Yongjun Jeon","orcid":null},"institutions":[{"id":"https://openalex.org/I848706","display_name":"Sungkyunkwan University","ror":"https://ror.org/04q78tk20","country_code":"KR","type":"education","lineage":["https://openalex.org/I848706"]}],"countries":["KR"],"is_corresponding":true,"raw_author_name":"Yongjun Jeon","raw_affiliation_strings":["Sungkyunkwan University"],"affiliations":[{"raw_affiliation_string":"Sungkyunkwan University","institution_ids":["https://openalex.org/I848706"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126195169","display_name":"Seonmin Park","orcid":null},"institutions":[{"id":"https://openalex.org/I2802194831","display_name":"Samsung Medical Center","ror":"https://ror.org/05a15z872","country_code":"KR","type":"healthcare","lineage":["https://openalex.org/I2250650973","https://openalex.org/I2802194831"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Seonmin Park","raw_affiliation_strings":["Samsung Medical Center"],"affiliations":[{"raw_affiliation_string":"Samsung Medical Center","institution_ids":["https://openalex.org/I2802194831"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126244059","display_name":"J. Shin","orcid":null},"institutions":[{"id":"https://openalex.org/I2802194831","display_name":"Samsung Medical Center","ror":"https://ror.org/05a15z872","country_code":"KR","type":"healthcare","lineage":["https://openalex.org/I2250650973","https://openalex.org/I2802194831"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Jongmin Shin","raw_affiliation_strings":["Samsung Medical Center"],"affiliations":[{"raw_affiliation_string":"Samsung Medical Center","institution_ids":["https://openalex.org/I2802194831"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5086313203","display_name":"Kanggil Park","orcid":null},"institutions":[{"id":"https://openalex.org/I2802194831","display_name":"Samsung Medical Center","ror":"https://ror.org/05a15z872","country_code":"KR","type":"healthcare","lineage":["https://openalex.org/I2250650973","https://openalex.org/I2802194831"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Kanggil Park","raw_affiliation_strings":["Samsung Medical Center"],"affiliations":[{"raw_affiliation_string":"Samsung Medical Center","institution_ids":["https://openalex.org/I2802194831"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100642851","display_name":"Byung\u2010Soo Kim","orcid":"https://orcid.org/0000-0001-5210-7430"},"institutions":[{"id":"https://openalex.org/I2802194831","display_name":"Samsung Medical Center","ror":"https://ror.org/05a15z872","country_code":"KR","type":"healthcare","lineage":["https://openalex.org/I2250650973","https://openalex.org/I2802194831"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Bogeun Kim","raw_affiliation_strings":["Samsung Medical Center"],"affiliations":[{"raw_affiliation_string":"Samsung Medical Center","institution_ids":["https://openalex.org/I2802194831"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043814482","display_name":"Namkee Oh","orcid":"https://orcid.org/0000-0002-6594-8973"},"institutions":[{"id":"https://openalex.org/I2802194831","display_name":"Samsung Medical Center","ror":"https://ror.org/05a15z872","country_code":"KR","type":"healthcare","lineage":["https://openalex.org/I2250650973","https://openalex.org/I2802194831"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Namkee Oh","raw_affiliation_strings":["Samsung Medical Center"],"affiliations":[{"raw_affiliation_string":"Samsung Medical Center","institution_ids":["https://openalex.org/I2802194831"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5087849800","display_name":"Kyu-Hwan Jung","orcid":"https://orcid.org/0000-0002-6626-6800"},"institutions":[{"id":"https://openalex.org/I848706","display_name":"Sungkyunkwan University","ror":"https://ror.org/04q78tk20","country_code":"KR","type":"education","lineage":["https://openalex.org/I848706"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Kyu-Hwan Jung","raw_affiliation_strings":["Sungkyunkwan University"],"affiliations":[{"raw_affiliation_string":"Sungkyunkwan University","institution_ids":["https://openalex.org/I848706"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5109263710"],"corresponding_institution_ids":["https://openalex.org/I848706"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.74888268,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1303","last_page":"1310"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.3382999897003174,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.3382999897003174,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.30570000410079956,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13702","display_name":"Machine Learning in Healthcare","score":0.0697999969124794,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.7592999935150146},{"id":"https://openalex.org/keywords/generative-grammar","display_name":"Generative grammar","score":0.6966999769210815},{"id":"https://openalex.org/keywords/construct","display_name":"Construct (python library)","score":0.5899999737739563},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.5482000112533569},{"id":"https://openalex.org/keywords/raw-data","display_name":"Raw data","score":0.49880000948905945},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.4507000148296356},{"id":"https://openalex.org/keywords/structured-prediction","display_name":"Structured prediction","score":0.43230000138282776},{"id":"https://openalex.org/keywords/generative-model","display_name":"Generative model","score":0.4174000024795532}],"concepts":[{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.7592999935150146},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.703499972820282},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.6966999769210815},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.5899999737739563},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5730999708175659},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.5482000112533569},{"id":"https://openalex.org/C132964779","wikidata":"https://www.wikidata.org/wiki/Q2110223","display_name":"Raw data","level":2,"score":0.49880000948905945},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4641999900341034},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.4507000148296356},{"id":"https://openalex.org/C22367795","wikidata":"https://www.wikidata.org/wiki/Q7625208","display_name":"Structured prediction","level":2,"score":0.43230000138282776},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.4174000024795532},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.3928999900817871},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.34610000252723694},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.33719998598098755},{"id":"https://openalex.org/C44291984","wikidata":"https://www.wikidata.org/wiki/Q1074173","display_name":"Question answering","level":2,"score":0.32100000977516174},{"id":"https://openalex.org/C78780964","wikidata":"https://www.wikidata.org/wiki/Q7233193","display_name":"Position paper","level":2,"score":0.31369999051094055},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.304500013589859},{"id":"https://openalex.org/C198082294","wikidata":"https://www.wikidata.org/wiki/Q3399648","display_name":"Position (finance)","level":2,"score":0.30300000309944153},{"id":"https://openalex.org/C2779370443","wikidata":"https://www.wikidata.org/wiki/Q1776627","display_name":"Surgical planning","level":2,"score":0.2874999940395355},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.28110000491142273},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.2689000070095062},{"id":"https://openalex.org/C2780910867","wikidata":"https://www.wikidata.org/wiki/Q1952416","display_name":"Multimodality","level":2,"score":0.26089999079704285},{"id":"https://openalex.org/C3019611579","wikidata":"https://www.wikidata.org/wiki/Q6641956","display_name":"Surgical procedures","level":2,"score":0.25589999556541443}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/iccvw69036.2025.00139","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iccvw69036.2025.00139","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/CVF International Conference on Computer Vision Workshops (ICCVW)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.41339924931526184}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":15,"referenced_works":["https://openalex.org/W1933349210","https://openalex.org/W3159131359","https://openalex.org/W4283368635","https://openalex.org/W4379879090","https://openalex.org/W4383109126","https://openalex.org/W4387211845","https://openalex.org/W4387211892","https://openalex.org/W4387225896","https://openalex.org/W4398395817","https://openalex.org/W4401044130","https://openalex.org/W4402784356","https://openalex.org/W4403063149","https://openalex.org/W4403791233","https://openalex.org/W4404788931","https://openalex.org/W4406016316"],"related_works":[],"abstract_inverted_index":{"Existing":[0],"discriminative":[1],"approaches":[2],"in":[3,64,141],"surgical":[4,65,98,126,151],"Visual":[5],"Question":[6],"Answering":[7],"(VQA)":[8],"exhibit":[9],"key":[10],"limitations,":[11],"including":[12],"susceptibility":[13],"to":[14,57,79,86,120,149],"data":[15],"distribution":[16],"bias,":[17],"overfitting,":[18],"ineffective":[19],"utilization":[20],"of":[21,28,97,125],"raw":[22,111],"text":[23,112],"supervision,":[24],"and":[25,61,91,106,114,143,154,160],"a":[26,53,81,93,102,107],"lack":[27],"deep":[29],"cross-modal":[30],"understanding.":[31],"Additionally,":[32],"their":[33],"reliance":[34],"on":[35,129],"fixed":[36],"answer":[37],"sets":[38],"makes":[39],"them":[40],"impractical":[41],"for":[42],"real-world":[43],"clinical":[44],"applications.":[45],"To":[46],"address":[47],"these":[48],"challenges,":[49],"we":[50],"propose":[51],"SurGen-Net,":[52],"generative":[54],"model":[55,76],"designed":[56],"enhance":[58,150],"multimodal":[59,117],"learning":[60],"contextual":[62],"reasoning":[63],"VQA.":[66],"Unlike":[67],"conventional":[68],"models":[69],"that":[70],"treat":[71],"question-answer":[72,89],"pairs":[73],"independently,":[74],"our":[75],"is":[77],"trained":[78],"generate":[80],"structured":[82,163],"format,":[83],"allowing":[84],"it":[85],"integrate":[87],"all":[88],"interactions":[90],"develop":[92],"more":[94],"comprehensive":[95],"understanding":[96],"scenes.":[99],"SurGen-Net":[100],"comprises":[101],"Surgical":[103,108],"Vision":[104],"Encoder":[105],"Captioner,":[109],"utilizing":[110],"supervision":[113],"an":[115],"advanced":[116],"fusion":[118],"mechanism":[119],"construct":[121],"rich":[122],"textual":[123],"representations":[124],"environments.":[127],"Evaluation":[128],"the":[130,161],"PitVQA":[131],"dataset":[132,164],"demonstrates":[133],"consistent":[134],"performance":[135],"gains":[136],"over":[137],"existing":[138],"models,":[139],"particularly":[140],"Instruments":[142],"Position":[144],"categories,":[145],"highlighting":[146],"its":[147],"ability":[148],"tool":[152],"recognition":[153],"spatial":[155],"reasoning.":[156],"The":[157],"implementation":[158],"code":[159],"newly":[162],"format":[165],"are":[166],"available":[167],"at":[168],"https://github.com/yongyong98/Surgen-Net.git.":[169]},"counts_by_year":[],"updated_date":"2026-02-25T06:17:34.324206","created_date":"2026-02-24T00:00:00"}
