{"id":"https://openalex.org/W7161683478","doi":"https://doi.org/10.48550/arxiv.2605.17915","title":"SurgLQA: Scalable Long-Horizon Surgical Video Question Answering","display_name":"SurgLQA: Scalable Long-Horizon Surgical Video Question Answering","publication_year":2026,"publication_date":"2026-05-18","ids":{"openalex":"https://openalex.org/W7161683478","doi":"https://doi.org/10.48550/arxiv.2605.17915"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.17915","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.17915","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Preprint"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.17915","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5136489509","display_name":"Diandian Guo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Guo, Diandian","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5046094203","display_name":"Xikai Yang","orcid":"https://orcid.org/0000-0003-1762-9684"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Xikai","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136499900","display_name":"Ruiyang Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Ruiyang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5082263233","display_name":"Jialun Pei","orcid":"https://orcid.org/0000-0002-2630-2838"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Pei, Jialun","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5136494412","display_name":"Pheng-Ann Heng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Heng, Pheng-Ann","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9923999905586243,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9923999905586243,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.002199999988079071,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.0010999999940395355,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.6758000254631042},{"id":"https://openalex.org/keywords/construct","display_name":"Construct (python library)","score":0.6535000205039978},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.6209999918937683},{"id":"https://openalex.org/keywords/question-answering","display_name":"Question answering","score":0.5167999863624573},{"id":"https://openalex.org/keywords/limiting","display_name":"Limiting","score":0.4740999937057495},{"id":"https://openalex.org/keywords/video-retrieval","display_name":"Video retrieval","score":0.37709999084472656},{"id":"https://openalex.org/keywords/visualization","display_name":"Visualization","score":0.3188999891281128}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7753000259399414},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.6758000254631042},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.6535000205039978},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.6209999918937683},{"id":"https://openalex.org/C44291984","wikidata":"https://www.wikidata.org/wiki/Q1074173","display_name":"Question answering","level":2,"score":0.5167999863624573},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4805000126361847},{"id":"https://openalex.org/C188198153","wikidata":"https://www.wikidata.org/wiki/Q1613840","display_name":"Limiting","level":2,"score":0.4740999937057495},{"id":"https://openalex.org/C2983174267","wikidata":"https://www.wikidata.org/wiki/Q3775098","display_name":"Video retrieval","level":2,"score":0.37709999084472656},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.32690000534057617},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3246000111103058},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.3188999891281128},{"id":"https://openalex.org/C2776321320","wikidata":"https://www.wikidata.org/wiki/Q857525","display_name":"Annotation","level":2,"score":0.3005000054836273},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.2989000082015991},{"id":"https://openalex.org/C99844830","wikidata":"https://www.wikidata.org/wiki/Q102441924","display_name":"Scaling","level":2,"score":0.29829999804496765},{"id":"https://openalex.org/C168167062","wikidata":"https://www.wikidata.org/wiki/Q1117970","display_name":"Component (thermodynamics)","level":2,"score":0.2939999997615814},{"id":"https://openalex.org/C55166926","wikidata":"https://www.wikidata.org/wiki/Q2892946","display_name":"Oracle","level":2,"score":0.2732999920845032},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.2702000141143799},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.26919999718666077},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.25760000944137573},{"id":"https://openalex.org/C158154518","wikidata":"https://www.wikidata.org/wiki/Q7310970","display_name":"Relevance (law)","level":2,"score":0.2517000138759613}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.17915","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.17915","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"Preprint"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.17915","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.17915","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Preprint"},"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","score":0.7523285746574402,"id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Surgical":[0],"Video":[1],"Question":[2],"Answering":[3],"(VideoQA)":[4],"provides":[5],"a":[6,56,115],"promising":[7],"paradigm":[8,98],"for":[9,61],"dynamic":[10],"intraoperative":[11],"interpretation,":[12],"enabling":[13],"real-time":[14],"decision":[15],"support":[16],"and":[17,42,121,127],"context-aware":[18],"retrieval":[19],"in":[20,139],"clinical":[21],"environments.":[22],"Nevertheless,":[23],"existing":[24],"approaches":[25],"are":[26],"predominantly":[27],"restricted":[28],"to":[29,37,77],"images":[30],"or":[31],"short":[32],"clips,":[33],"limiting":[34],"their":[35],"ability":[36],"model":[38],"long-range":[39,80,140],"procedural":[40],"dynamics":[41],"causal":[43],"dependencies":[44],"across":[45],"extended":[46],"surgical":[47,63],"workflows.":[48],"To":[49,109],"address":[50],"this":[51],"challenge,":[52],"we":[53,88,113],"propose":[54],"SurgLQA,":[55],"unified":[57],"long-horizon":[58],"VideoQA":[59,118],"framework":[60,66],"scalable":[62],"reasoning.":[64],"This":[65],"incorporates":[67],"Faithful":[68],"Temporal":[69],"Consolidation":[70],"(FTC),":[71],"which":[72],"leverages":[73],"intrinsic":[74],"temporal":[75,85],"cues":[76],"construct":[78],"compact":[79],"representations":[81],"while":[82],"preserving":[83],"fine-grained":[84],"fidelity.":[86],"Further,":[87],"develop":[89],"Temporally-Grounded":[90],"Multi-Policy":[91],"Scaling":[92],"(TMS),":[93],"an":[94],"adaptive":[95],"test-time":[96],"inference":[97],"that":[99,132],"strategically":[100],"adjusts":[101],"policy-level":[102],"reasoning":[103,141],"capacity":[104],"within":[105],"temporally":[106,143],"grounded":[107,144],"contexts.":[108],"facilitate":[110],"systematic":[111],"evaluation,":[112],"restructured":[114],"long-duration":[116],"colonoscopy":[117],"benchmark,":[119],"Colon-LQA,":[120],"conducted":[122],"extensive":[123],"experiments":[124],"on":[125],"Colon-LQA":[126],"REAL-Colon-VQA.":[128],"Experimental":[129],"results":[130],"demonstrate":[131],"our":[133],"approach":[134],"achieves":[135],"consistent":[136],"performance":[137],"gains":[138],"with":[142],"inference.":[145],"Code":[146],"link:":[147],"https://github.com/RascalGdd/SurgLQA.":[148]},"counts_by_year":[],"updated_date":"2026-07-01T06:00:48.157686","created_date":"2026-05-20T00:00:00"}
