{"id":"https://openalex.org/W4400275574","doi":"https://doi.org/10.1109/tcsvt.2024.3422879","title":"CTOD: Cross-Attentive Task-Alignment for One-Stage Object Detection","display_name":"CTOD: Cross-Attentive Task-Alignment for One-Stage Object Detection","publication_year":2024,"publication_date":"2024-07-03","ids":{"openalex":"https://openalex.org/W4400275574","doi":"https://doi.org/10.1109/tcsvt.2024.3422879"},"language":"en","primary_location":{"id":"doi:10.1109/tcsvt.2024.3422879","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsvt.2024.3422879","pdf_url":null,"source":{"id":"https://openalex.org/S115173108","display_name":"IEEE Transactions on Circuits and Systems for Video Technology","issn_l":"1051-8215","issn":["1051-8215","1558-2205"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems for Video Technology","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5053695187","display_name":"Ruilin Yao","orcid":"https://orcid.org/0009-0002-6654-2294"},"institutions":[{"id":"https://openalex.org/I196699116","display_name":"Wuhan University of Technology","ror":"https://ror.org/03fe7t173","country_code":"CN","type":"education","lineage":["https://openalex.org/I196699116"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Ruilin Yao","raw_affiliation_strings":["School of Computer Science and Artificial Intelligence, Wuhan University of Technology, Wuhan, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Artificial Intelligence, Wuhan University of Technology, Wuhan, China","institution_ids":["https://openalex.org/I196699116"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101967703","display_name":"Yi Rong","orcid":"https://orcid.org/0000-0003-4867-6811"},"institutions":[{"id":"https://openalex.org/I196699116","display_name":"Wuhan University of Technology","ror":"https://ror.org/03fe7t173","country_code":"CN","type":"education","lineage":["https://openalex.org/I196699116"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yi Rong","raw_affiliation_strings":["School of Computer Science and Artificial Intelligence, Wuhan University of Technology, Wuhan, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Artificial Intelligence, Wuhan University of Technology, Wuhan, China","institution_ids":["https://openalex.org/I196699116"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5011608667","display_name":"Qiangqiang Huang","orcid":"https://orcid.org/0000-0001-9079-0824"},"institutions":[{"id":"https://openalex.org/I196699116","display_name":"Wuhan University of Technology","ror":"https://ror.org/03fe7t173","country_code":"CN","type":"education","lineage":["https://openalex.org/I196699116"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qiangqiang Huang","raw_affiliation_strings":["School of Computer Science and Artificial Intelligence, Wuhan University of Technology, Wuhan, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Artificial Intelligence, Wuhan University of Technology, Wuhan, China","institution_ids":["https://openalex.org/I196699116"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5011707621","display_name":"Shengwu Xiong","orcid":"https://orcid.org/0000-0002-4006-7029"},"institutions":[{"id":"https://openalex.org/I196699116","display_name":"Wuhan University of Technology","ror":"https://ror.org/03fe7t173","country_code":"CN","type":"education","lineage":["https://openalex.org/I196699116"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shengwu Xiong","raw_affiliation_strings":["School of Computer Science and Artificial Intelligence, Wuhan University of Technology, Wuhan, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Artificial Intelligence, Wuhan University of Technology, Wuhan, China","institution_ids":["https://openalex.org/I196699116"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5053695187"],"corresponding_institution_ids":["https://openalex.org/I196699116"],"apc_list":null,"apc_paid":null,"fwci":2.8948,"has_fulltext":false,"cited_by_count":11,"citation_normalized_percentile":{"value":0.92099101,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":"34","issue":"11","first_page":"11507","last_page":"11520"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9822999835014343,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9822999835014343,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9598000049591064,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9372000098228455,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6178069710731506},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.6124835014343262},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.612389326095581},{"id":"https://openalex.org/keywords/object-detection","display_name":"Object detection","score":0.5339734554290771},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.5215677618980408},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5079160332679749},{"id":"https://openalex.org/keywords/stage","display_name":"Stage (stratigraphy)","score":0.4285276532173157},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3451001048088074},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.16332653164863586},{"id":"https://openalex.org/keywords/geology","display_name":"Geology","score":0.14389991760253906}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6178069710731506},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.6124835014343262},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.612389326095581},{"id":"https://openalex.org/C2776151529","wikidata":"https://www.wikidata.org/wiki/Q3045304","display_name":"Object detection","level":3,"score":0.5339734554290771},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.5215677618980408},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5079160332679749},{"id":"https://openalex.org/C146357865","wikidata":"https://www.wikidata.org/wiki/Q1123245","display_name":"Stage (stratigraphy)","level":2,"score":0.4285276532173157},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3451001048088074},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.16332653164863586},{"id":"https://openalex.org/C127313418","wikidata":"https://www.wikidata.org/wiki/Q1069","display_name":"Geology","level":0,"score":0.14389991760253906},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tcsvt.2024.3422879","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsvt.2024.3422879","pdf_url":null,"source":{"id":"https://openalex.org/S115173108","display_name":"IEEE Transactions on Circuits and Systems for Video Technology","issn_l":"1051-8215","issn":["1051-8215","1558-2205"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems for Video Technology","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.4300000071525574,"id":"https://metadata.un.org/sdg/10","display_name":"Reduced inequalities"}],"awards":[{"id":"https://openalex.org/G4653825012","display_name":null,"funder_award_id":"62176194","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":71,"referenced_works":["https://openalex.org/W1861492603","https://openalex.org/W2108598243","https://openalex.org/W2161969291","https://openalex.org/W2168356304","https://openalex.org/W2194775991","https://openalex.org/W2549139847","https://openalex.org/W2565639579","https://openalex.org/W2570343428","https://openalex.org/W2601564443","https://openalex.org/W2886904239","https://openalex.org/W2896457183","https://openalex.org/W2928165649","https://openalex.org/W2934198733","https://openalex.org/W2962766617","https://openalex.org/W2963037989","https://openalex.org/W2963091558","https://openalex.org/W2963163009","https://openalex.org/W2963351448","https://openalex.org/W2964080601","https://openalex.org/W2966926453","https://openalex.org/W2982770724","https://openalex.org/W2985405845","https://openalex.org/W2986357608","https://openalex.org/W3015352266","https://openalex.org/W3035316790","https://openalex.org/W3035396860","https://openalex.org/W3096609285","https://openalex.org/W3108849448","https://openalex.org/W3138516171","https://openalex.org/W3159885298","https://openalex.org/W3160694286","https://openalex.org/W3167308647","https://openalex.org/W3171660447","https://openalex.org/W3172087149","https://openalex.org/W3176187859","https://openalex.org/W3196024568","https://openalex.org/W4211041154","https://openalex.org/W4213019189","https://openalex.org/W4214489586","https://openalex.org/W4214507171","https://openalex.org/W4220726952","https://openalex.org/W4221143432","https://openalex.org/W4225745741","https://openalex.org/W4226160848","https://openalex.org/W4288325606","https://openalex.org/W4312313652","https://openalex.org/W4312348243","https://openalex.org/W4312789460","https://openalex.org/W4313119505","https://openalex.org/W4313525589","https://openalex.org/W4313898748","https://openalex.org/W4319068750","https://openalex.org/W4319996489","https://openalex.org/W4320002812","https://openalex.org/W4320009770","https://openalex.org/W4320736485","https://openalex.org/W4327653207","https://openalex.org/W4361996346","https://openalex.org/W4383113375","https://openalex.org/W4383220186","https://openalex.org/W4385245566","https://openalex.org/W4385893752","https://openalex.org/W4386066009","https://openalex.org/W4386083115","https://openalex.org/W4389961109","https://openalex.org/W6737664043","https://openalex.org/W6766978945","https://openalex.org/W6779586474","https://openalex.org/W6779709467","https://openalex.org/W6784094891","https://openalex.org/W6784923365"],"related_works":["https://openalex.org/W2329386257","https://openalex.org/W2503350049","https://openalex.org/W2397616145","https://openalex.org/W2397320258","https://openalex.org/W4324058133","https://openalex.org/W2732813147","https://openalex.org/W2143460112","https://openalex.org/W2042906257","https://openalex.org/W4292830139","https://openalex.org/W4319309705"],"abstract_inverted_index":{"Existing":[0],"one-stage":[1,231],"object":[2,19,266],"detectors":[3,232],"are":[4,33,193],"commonly":[5],"implemented":[6],"in":[7,58,259],"a":[8,82,107,125,136,216],"multi-task":[9],"learning":[10,59],"based":[11,130],"manner,":[12],"which":[13,113],"simultaneously":[14],"solves":[15],"two":[16,30,75,181],"different":[17],"sub-tasks:":[18],"classification":[20,62],"and":[21,63,145,224,236,242,255],"localization.":[22],"To":[23],"achieve":[24],"this,":[25],"the":[26,48,53,56,71,97,159,170,176,180,185,196,210,228,253,261],"detection":[27,217],"heads":[28],"with":[29,222],"independent":[31],"branches":[32],"typically":[34],"utilized":[35],"to":[36,47,67,90,147,173],"extract":[37],"specific":[38],"image":[39],"features":[40,122,188],"for":[41,100,117,265],"each":[42,118,155],"task":[43,126,262],"separately.":[44],"However,":[45],"due":[46],"lack":[49],"of":[50,61,73,158,179,219,248,257],"interaction":[51],"between":[52,70],"parallel":[54],"branches,":[55],"difference":[57],"objectives":[60],"localization":[64],"will":[65],"lead":[66],"spatial":[68,156],"misalignment":[69,263],"predictions":[72],"these":[74,132],"tasks.":[76,102],"In":[77],"this":[78,92],"work,":[79],"we":[80,104,134,164],"propose":[81,135],"novel":[83],"Cross-attentive":[84],"Task-aligned":[85],"Object":[86],"Detection":[87],"(CTOD)":[88],"method":[89,214],"handle":[91],"problem":[93,264],"by":[94,123,238],"explicitly":[95],"promoting":[96],"prediction":[98,177],"consistency":[99],"both":[101],"Specifically,":[103],"first":[105],"design":[106],"Dual":[108],"Task":[109],"Interaction":[110],"(DTI)":[111],"module,":[112],"generates":[114],"task-interactive":[115,171],"embeddings":[116,172],"branch":[119],"from":[120,150,169,184],"task-specific":[121,160,187],"using":[124,200],"cross-attention":[127],"mechanism.":[128],"Then":[129],"on":[131,195],"embeddings,":[133],"Spatial":[137],"Feature":[138],"Aggregation":[139],"(SFA)":[140],"module":[141],"that":[142],"calculates":[143],"offsets":[144],"weights":[146],"aggregate":[148],"information":[149],"nearby":[151],"feature":[152,161],"points":[153],"at":[154,272],"location":[157],"maps.":[162],"Meanwhile,":[163],"also":[165,251],"generate":[166],"adjustment":[167],"parameters":[168],"finally":[174],"align":[175],"results":[178,250],"tasks":[182],"obtained":[183],"enhanced":[186],"described":[189],"above.":[190],"Extensive":[191],"experiments":[192],"conducted":[194],"MS-COCO":[197],"dataset.":[198],"When":[199],"ResNeXt-101-<inline-formula":[201],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[202,274],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">":[203],"<tex-math":[204],"notation=\"LaTeX\">$64\\times":[205],"4$":[206],"</tex-math></inline-formula>":[207],"d-DCN":[208],"as":[209],"backbone,":[211],"our":[212],"CTOD":[213,258],"achieves":[215],"result":[218],"51.8":[220],"AP":[221],"single-model":[223],"single-scale":[225],"testing,":[226],"outperforming":[227],"recently":[229],"proposed":[230],"ATSS,":[233],"VFNet,":[234],"LD":[235],"TOOD":[237],"4.1,":[239],"1.9,":[240],"1.3":[241],"0.7":[243],"AP,":[244],"respectively.":[245],"The":[246],"analysis":[247],"qualitative":[249],"illustrates":[252],"effectiveness":[254],"superiority":[256],"solving":[260],"detection.":[267],"Our":[268],"code":[269],"is":[270],"available":[271],"<uri":[273],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">https://github.com/Mr-Bigworth/CTOD</uri>.":[275]},"counts_by_year":[{"year":2025,"cited_by_count":10},{"year":2024,"cited_by_count":1}],"updated_date":"2026-02-25T08:12:03.925757","created_date":"2025-10-10T00:00:00"}
