{"id":"https://openalex.org/W7134894685","doi":"https://doi.org/10.48550/arxiv.2603.09245","title":"Towards Instance Segmentation with Polygon Detection Transformers","display_name":"Towards Instance Segmentation with Polygon Detection Transformers","publication_year":2026,"publication_date":"2026-03-10","ids":{"openalex":"https://openalex.org/W7134894685","doi":"https://doi.org/10.48550/arxiv.2603.09245"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.09245","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.09245","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.09245","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5128796575","display_name":"Jiacheng Sun","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sun, Jiacheng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128775064","display_name":"Jiaqi Lin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lin, Jiaqi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5121767669","display_name":"Wenlong Hu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hu, Wenlong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128743077","display_name":"Haoyang Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Haoyang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128761753","display_name":"Xinghong Zhou","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhou, Xinghong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5123725018","display_name":"Chenghai Mao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mao, Chenghai","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128740202","display_name":"Yan Peng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Peng, Yan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5044546450","display_name":"Xiaomao Li","orcid":"https://orcid.org/0000-0002-4106-4858"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Xiaomao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":8,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.5562000274658203,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.5562000274658203,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12859","display_name":"Cell Image Analysis Techniques","score":0.13519999384880066,"subfield":{"id":"https://openalex.org/subfields/1304","display_name":"Biophysics"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10052","display_name":"Medical Image Segmentation Techniques","score":0.13300000131130219,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.7378000020980835},{"id":"https://openalex.org/keywords/polygon","display_name":"Polygon (computer graphics)","score":0.5652999877929688},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.4771000146865845},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.4250999987125397},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.42250001430511475},{"id":"https://openalex.org/keywords/image-segmentation","display_name":"Image segmentation","score":0.39250001311302185},{"id":"https://openalex.org/keywords/polar","display_name":"Polar","score":0.36489999294281006},{"id":"https://openalex.org/keywords/polar-coordinate-system","display_name":"Polar coordinate system","score":0.3249000012874603}],"concepts":[{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.7378000020980835},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6392999887466431},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6105999946594238},{"id":"https://openalex.org/C190694206","wikidata":"https://www.wikidata.org/wiki/Q3276654","display_name":"Polygon (computer graphics)","level":3,"score":0.5652999877929688},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.4771000146865845},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.4560000002384186},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.4250999987125397},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.42250001430511475},{"id":"https://openalex.org/C124504099","wikidata":"https://www.wikidata.org/wiki/Q56933","display_name":"Image segmentation","level":3,"score":0.39250001311302185},{"id":"https://openalex.org/C29705727","wikidata":"https://www.wikidata.org/wiki/Q294562","display_name":"Polar","level":2,"score":0.36489999294281006},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3334999978542328},{"id":"https://openalex.org/C40069579","wikidata":"https://www.wikidata.org/wiki/Q62494","display_name":"Polar coordinate system","level":2,"score":0.3249000012874603},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.3043000102043152},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.28279998898506165},{"id":"https://openalex.org/C2776151529","wikidata":"https://www.wikidata.org/wiki/Q3045304","display_name":"Object detection","level":3,"score":0.28060001134872437},{"id":"https://openalex.org/C83546350","wikidata":"https://www.wikidata.org/wiki/Q1139051","display_name":"Regression","level":2,"score":0.27140000462532043},{"id":"https://openalex.org/C80899671","wikidata":"https://www.wikidata.org/wiki/Q1304193","display_name":"Vertex (graph theory)","level":3,"score":0.26260000467300415},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.2590999901294708},{"id":"https://openalex.org/C62354387","wikidata":"https://www.wikidata.org/wiki/Q875399","display_name":"Boundary (topology)","level":2,"score":0.25290000438690186},{"id":"https://openalex.org/C65885262","wikidata":"https://www.wikidata.org/wiki/Q7429708","display_name":"Scale-space segmentation","level":4,"score":0.25290000438690186}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.09245","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.09245","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.09245","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.09245","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"score":0.7300704717636108,"id":"https://metadata.un.org/sdg/11","display_name":"Sustainable cities and communities"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"One":[0],"of":[1,13],"the":[2,10,44,52],"bottlenecks":[3],"for":[4],"instance":[5,33],"segmentation":[6,34],"today":[7],"lies":[8],"in":[9,56,118,155],"conflicting":[11],"requirements":[12],"high-resolution":[14,119],"inputs":[15],"and":[16,64,72,107,135],"lightweight,":[17],"real-time":[18],"inference.":[19],"To":[20],"address":[21],"this":[22],"bottleneck,":[23],"we":[24,59,94],"present":[25],"a":[26,85,96,102],"Polygon":[27],"Detection":[28,57],"Transformer":[29],"(Poly-DETR)":[30],"to":[31,68,100],"reformulate":[32],"as":[35],"sparse":[36],"vertex":[37],"regression":[38],"via":[39],"Polar":[40,61],"Representation,":[41],"thereby":[42],"eliminating":[43],"reliance":[45],"on":[46,75,89,127,131,145,152],"dense":[47],"pixel-wise":[48],"mask":[49,108],"prediction.":[50],"Considering":[51],"box-to-polygon":[53],"reference":[54],"shift":[55],"Transformers,":[58],"propose":[60],"Deformable":[62],"Attention":[63],"Position-Aware":[65],"Training":[66],"Scheme":[67],"dynamically":[69],"update":[70],"supervision":[71],"focus":[73],"attention":[74],"boundary":[76],"cues.":[77],"Compared":[78],"with":[79],"state-of-the-art":[80],"polar-based":[81],"methods,":[82],"Poly-DETR":[83,114,140],"achieves":[84],"4.7":[86],"mAP":[87],"improvement":[88],"MS":[90],"COCO":[91],"test-dev.":[92],"Moreover,":[93],"construct":[95],"parallel":[97],"mask-based":[98,143],"counterpart":[99,144],"support":[101],"systematic":[103],"comparison":[104],"between":[105],"polar":[106],"representations.":[109],"Experimental":[110],"results":[111],"show":[112],"that":[113],"is":[115],"more":[116],"lightweight":[117],"scenarios,":[120],"reducing":[121],"memory":[122],"consumption":[123],"by":[124],"almost":[125],"half":[126],"Cityscapes":[128],"dataset.":[129],"Notably,":[130],"PanNuke":[132],"(cell":[133],"segmentation)":[134],"SpaceNet":[136],"(building":[137],"footprints)":[138],"datasets,":[139],"surpasses":[141],"its":[142,150],"all":[146],"metrics,":[147],"which":[148],"validates":[149],"advantage":[151],"regular-shaped":[153],"instances":[154],"domain-specific":[156],"settings.":[157]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-03-12T00:00:00"}
