{"id":"https://openalex.org/W7158939297","doi":"https://doi.org/10.48550/arxiv.2604.26232","title":"DepthPilot: From Controllability to Interpretability in Colonoscopy Video Generation","display_name":"DepthPilot: From Controllability to Interpretability in Colonoscopy Video Generation","publication_year":2026,"publication_date":"2026-04-29","ids":{"openalex":"https://openalex.org/W7158939297","doi":"https://doi.org/10.48550/arxiv.2604.26232"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.26232","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.26232","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.26232","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5012636001","display_name":"Junhu Fu","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Fu, Junhu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134887772","display_name":"Ke Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Ke","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134907312","display_name":"Weidong Guo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Guo, Weidong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5127303660","display_name":"Shuyu Liang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liang, Shuyu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134891182","display_name":"Jie Xu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xu, Jie","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134921300","display_name":"Chen Ma","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ma, Chen","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5127243309","display_name":"Kehao Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Kehao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038734312","display_name":"Shengli Lin","orcid":"https://orcid.org/0000-0002-5785-8695"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lin, Shengli","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134920845","display_name":"Zeju Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Zeju","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134875804","display_name":"Yuanyuan Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Yuanyuan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134894000","display_name":"Yi \u90ed\u4e00 Guo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Guo, Yi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5134919965","display_name":"Shuo Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Shuo","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":12,"corresponding_author_ids":["https://openalex.org/A5012636001"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.5131999850273132,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.5131999850273132,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10552","display_name":"Colorectal Cancer Screening and Detection","score":0.0868000015616417,"subfield":{"id":"https://openalex.org/subfields/2730","display_name":"Oncology"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.05770000070333481,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/interpretability","display_name":"Interpretability","score":0.8690000176429749},{"id":"https://openalex.org/keywords/controllability","display_name":"Controllability","score":0.6866999864578247},{"id":"https://openalex.org/keywords/prior-probability","display_name":"Prior probability","score":0.5296000242233276},{"id":"https://openalex.org/keywords/spline","display_name":"Spline (mechanical)","score":0.46790000796318054},{"id":"https://openalex.org/keywords/bridging","display_name":"Bridging (networking)","score":0.42260000109672546},{"id":"https://openalex.org/keywords/rendering","display_name":"Rendering (computer graphics)","score":0.4007999897003174},{"id":"https://openalex.org/keywords/nonlinear-system","display_name":"Nonlinear system","score":0.37400001287460327},{"id":"https://openalex.org/keywords/landmark","display_name":"Landmark","score":0.33309999108314514}],"concepts":[{"id":"https://openalex.org/C2781067378","wikidata":"https://www.wikidata.org/wiki/Q17027399","display_name":"Interpretability","level":2,"score":0.8690000176429749},{"id":"https://openalex.org/C48209547","wikidata":"https://www.wikidata.org/wiki/Q1331104","display_name":"Controllability","level":2,"score":0.6866999864578247},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6714000105857849},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6345999836921692},{"id":"https://openalex.org/C177769412","wikidata":"https://www.wikidata.org/wiki/Q278090","display_name":"Prior probability","level":3,"score":0.5296000242233276},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.5195000171661377},{"id":"https://openalex.org/C10390562","wikidata":"https://www.wikidata.org/wiki/Q581809","display_name":"Spline (mechanical)","level":2,"score":0.46790000796318054},{"id":"https://openalex.org/C174348530","wikidata":"https://www.wikidata.org/wiki/Q188635","display_name":"Bridging (networking)","level":2,"score":0.42260000109672546},{"id":"https://openalex.org/C205711294","wikidata":"https://www.wikidata.org/wiki/Q176953","display_name":"Rendering (computer graphics)","level":2,"score":0.4007999897003174},{"id":"https://openalex.org/C158622935","wikidata":"https://www.wikidata.org/wiki/Q660848","display_name":"Nonlinear system","level":2,"score":0.37400001287460327},{"id":"https://openalex.org/C2780297707","wikidata":"https://www.wikidata.org/wiki/Q4895393","display_name":"Landmark","level":2,"score":0.33309999108314514},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.33009999990463257},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.32679998874664307},{"id":"https://openalex.org/C163294075","wikidata":"https://www.wikidata.org/wiki/Q581861","display_name":"Noise reduction","level":2,"score":0.31839999556541443},{"id":"https://openalex.org/C153701036","wikidata":"https://www.wikidata.org/wiki/Q659974","display_name":"Trustworthiness","level":2,"score":0.3125999867916107},{"id":"https://openalex.org/C31601959","wikidata":"https://www.wikidata.org/wiki/Q931309","display_name":"Medical imaging","level":2,"score":0.29809999465942383},{"id":"https://openalex.org/C160920958","wikidata":"https://www.wikidata.org/wiki/Q7662746","display_name":"Synthetic data","level":2,"score":0.29760000109672546},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.29100000858306885},{"id":"https://openalex.org/C34413123","wikidata":"https://www.wikidata.org/wiki/Q170978","display_name":"Robotics","level":3,"score":0.2874000072479248},{"id":"https://openalex.org/C739882","wikidata":"https://www.wikidata.org/wiki/Q3560506","display_name":"Anomaly detection","level":2,"score":0.2676999866962433},{"id":"https://openalex.org/C124504099","wikidata":"https://www.wikidata.org/wiki/Q56933","display_name":"Image segmentation","level":3,"score":0.26089999079704285},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.257999986410141},{"id":"https://openalex.org/C2780440489","wikidata":"https://www.wikidata.org/wiki/Q5227278","display_name":"Data-driven","level":2,"score":0.2574000060558319}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.26232","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.26232","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.26232","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.26232","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.48413461446762085}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Controllable":[0],"medical":[1],"video":[2,45],"generation":[3,54],"has":[4],"achieved":[5],"remarkable":[6],"progress,":[7],"but":[8],"it":[9],"still":[10],"lacks":[11],"interpretability,":[12,35],"which":[13],"requires":[14],"the":[15,29,39,75,149,180],"alignment":[16,69],"of":[17],"generated":[18],"contents":[19],"with":[20,105],"physical":[21],"priors":[22],"and":[23,120,142,154,170,174],"faithful":[24],"clinical":[25,122],"manifestations.":[26],"To":[27,59,85],"push":[28],"boundaries":[30],"from":[31],"mere":[32],"controllability":[33],"to":[34,81,109,128,162],"we":[36],"propose":[37],"DepthPilot,":[38],"first":[40,144],"interpretable":[41],"framework":[42],"for":[43],"colonoscopy":[44],"generation.":[46],"This":[47],"work":[48],"takes":[49],"a":[50,66,177],"step":[51],"toward":[52,179],"trustworthy":[53],"through":[55],"two":[56],"synergistic":[57],"paradigms.":[58],"achieve":[60],"explicit":[61],"geometric":[62,92],"grounding,":[63],"DepthPilot":[64,94],"devises":[65],"prior":[67],"distribution":[68],"strategy,":[70],"injecting":[71],"depth":[72],"constraints":[73],"into":[74],"diffusion":[76],"backbone":[77],"via":[78],"parameter-efficient":[79],"fine-tuning":[80],"ensure":[82],"anatomical":[83],"fidelity.":[84],"enhance":[86],"intrinsic":[87],"nonlinear":[88],"modeling":[89],"under":[90],"these":[91],"constraints,":[93],"employs":[95],"an":[96],"adaptive":[97],"spline":[98,107],"denoising":[99],"module,":[100],"replacing":[101],"fixed":[102],"linear":[103],"weights":[104],"learnable":[106],"functions":[108],"capture":[110],"complex":[111],"spatio-temporal":[112],"dynamics.":[113],"Extensive":[114],"evaluations":[115],"across":[116,139],"three":[117],"public":[118],"datasets":[119],"in-house":[121],"data":[123],"confirm":[124],"DepthPilot's":[125],"robust":[126],"ability":[127],"produce":[129],"physically":[130],"consistent":[131],"videos.":[132],"It":[133],"achieves":[134],"FID":[135],"scores":[136],"below":[137],"15":[138],"all":[140],"benchmarks":[141],"ranks":[143],"in":[145],"clinician":[146],"assessments,":[147],"bridging":[148],"gap":[150],"between":[151],"\"visually":[152],"realistic\"":[153],"\"clinically":[155],"interpretable\".":[156],"Moreover,":[157],"DepthPilot-generated":[158],"videos":[159],"are":[160],"expected":[161],"enable":[163],"reliable":[164],"3D":[165],"reconstruction,":[166],"facilitating":[167],"surgical":[168],"navigation":[169],"blind":[171],"region":[172],"identification,":[173],"serve":[175],"as":[176],"foundation":[178],"colorectal":[181],"world":[182],"model.":[183]},"counts_by_year":[],"updated_date":"2026-05-05T08:41:31.759640","created_date":"2026-05-01T00:00:00"}
