{"id":"https://openalex.org/W4285207098","doi":"https://doi.org/10.1109/tcsvt.2022.3178144","title":"A Simple Visual-Textual Baseline for Pedestrian Attribute Recognition","display_name":"A Simple Visual-Textual Baseline for Pedestrian Attribute Recognition","publication_year":2022,"publication_date":"2022-05-26","ids":{"openalex":"https://openalex.org/W4285207098","doi":"https://doi.org/10.1109/tcsvt.2022.3178144"},"language":"en","primary_location":{"id":"doi:10.1109/tcsvt.2022.3178144","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsvt.2022.3178144","pdf_url":null,"source":{"id":"https://openalex.org/S115173108","display_name":"IEEE Transactions on Circuits and Systems for Video Technology","issn_l":"1051-8215","issn":["1051-8215","1558-2205"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems for Video Technology","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5025934537","display_name":"Xinhua Cheng","orcid":"https://orcid.org/0000-0001-9034-279X"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Xinhua Cheng","raw_affiliation_strings":["School of Electronic and Computer Engineering, Peking University Shenzhen Graduate School, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"School of Electronic and Computer Engineering, Peking University Shenzhen Graduate School, Shenzhen, China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5074366979","display_name":"Mengxi Jia","orcid":"https://orcid.org/0000-0002-0979-9803"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Mengxi Jia","raw_affiliation_strings":["School of Electronic and Computer Engineering, Peking University Shenzhen Graduate School, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"School of Electronic and Computer Engineering, Peking University Shenzhen Graduate School, Shenzhen, China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100391155","display_name":"Qian Wang","orcid":"https://orcid.org/0000-0003-2347-8798"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qian Wang","raw_affiliation_strings":["School of Electronic and Computer Engineering, Peking University Shenzhen Graduate School, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"School of Electronic and Computer Engineering, Peking University Shenzhen Graduate School, Shenzhen, China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100409856","display_name":"Jian Zhang","orcid":"https://orcid.org/0000-0001-5486-3125"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]},{"id":"https://openalex.org/I4210136793","display_name":"Peng Cheng Laboratory","ror":"https://ror.org/03qdqbt06","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210136793"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jian Zhang","raw_affiliation_strings":["School of Electronic and Computer Engineering, Peking University Shenzhen Graduate School, Shenzhen, China","Peng Cheng Laboratory, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"School of Electronic and Computer Engineering, Peking University Shenzhen Graduate School, Shenzhen, China","institution_ids":["https://openalex.org/I20231570"]},{"raw_affiliation_string":"Peng Cheng Laboratory, Shenzhen, China","institution_ids":["https://openalex.org/I4210136793"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5025934537"],"corresponding_institution_ids":["https://openalex.org/I20231570"],"apc_list":null,"apc_paid":null,"fwci":4.381,"has_fulltext":false,"cited_by_count":48,"citation_normalized_percentile":{"value":0.95668194,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":100},"biblio":{"volume":"32","issue":"10","first_page":"6994","last_page":"7004"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10331","display_name":"Video Surveillance and Tracking Methods","score":0.996399998664856,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9959999918937683,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8004444241523743},{"id":"https://openalex.org/keywords/baseline","display_name":"Baseline (sea)","score":0.6754046678543091},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.6309465169906616},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.605017364025116},{"id":"https://openalex.org/keywords/modality","display_name":"Modality (human\u2013computer interaction)","score":0.5591995120048523},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.5026371479034424},{"id":"https://openalex.org/keywords/modal","display_name":"Modal","score":0.4676154851913452},{"id":"https://openalex.org/keywords/pedestrian","display_name":"Pedestrian","score":0.4531838595867157},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4492928385734558},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.37977951765060425},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3452083170413971}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8004444241523743},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.6754046678543091},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.6309465169906616},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.605017364025116},{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.5591995120048523},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.5026371479034424},{"id":"https://openalex.org/C71139939","wikidata":"https://www.wikidata.org/wiki/Q910194","display_name":"Modal","level":2,"score":0.4676154851913452},{"id":"https://openalex.org/C2777113093","wikidata":"https://www.wikidata.org/wiki/Q221488","display_name":"Pedestrian","level":2,"score":0.4531838595867157},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4492928385734558},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.37977951765060425},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3452083170413971},{"id":"https://openalex.org/C188027245","wikidata":"https://www.wikidata.org/wiki/Q750446","display_name":"Polymer chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C127313418","wikidata":"https://www.wikidata.org/wiki/Q1069","display_name":"Geology","level":0,"score":0.0},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0},{"id":"https://openalex.org/C22212356","wikidata":"https://www.wikidata.org/wiki/Q775325","display_name":"Transport engineering","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C111368507","wikidata":"https://www.wikidata.org/wiki/Q43518","display_name":"Oceanography","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tcsvt.2022.3178144","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsvt.2022.3178144","pdf_url":null,"source":{"id":"https://openalex.org/S115173108","display_name":"IEEE Transactions on Circuits and Systems for Video Technology","issn_l":"1051-8215","issn":["1051-8215","1558-2205"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems for Video Technology","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/1","display_name":"No poverty","score":0.49000000953674316}],"awards":[{"id":"https://openalex.org/G6497295135","display_name":null,"funder_award_id":"GXWD20201231165807007- 20200807164903001","funder_id":"https://openalex.org/F4320329791","funder_display_name":"Shenzhen Fundamental Research Program"}],"funders":[{"id":"https://openalex.org/F4320329791","display_name":"Shenzhen Fundamental Research Program","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":61,"referenced_works":["https://openalex.org/W1907729166","https://openalex.org/W2111025459","https://openalex.org/W2194775991","https://openalex.org/W2250539671","https://openalex.org/W2286727787","https://openalex.org/W2410968923","https://openalex.org/W2536626143","https://openalex.org/W2560096627","https://openalex.org/W2604463754","https://openalex.org/W2808154247","https://openalex.org/W2867270703","https://openalex.org/W2888471892","https://openalex.org/W2896249043","https://openalex.org/W2896457183","https://openalex.org/W2898491875","https://openalex.org/W2904764169","https://openalex.org/W2905439313","https://openalex.org/W2918609062","https://openalex.org/W2919631354","https://openalex.org/W2950160919","https://openalex.org/W2963365374","https://openalex.org/W2963557071","https://openalex.org/W2963790258","https://openalex.org/W2964248351","https://openalex.org/W2965153332","https://openalex.org/W2968124245","https://openalex.org/W2970231061","https://openalex.org/W2970776387","https://openalex.org/W2981779267","https://openalex.org/W2986999591","https://openalex.org/W2987191133","https://openalex.org/W2998496429","https://openalex.org/W3002124928","https://openalex.org/W3002792304","https://openalex.org/W3013799809","https://openalex.org/W3024059432","https://openalex.org/W3026128218","https://openalex.org/W3034727271","https://openalex.org/W3034927684","https://openalex.org/W3090853722","https://openalex.org/W3103850820","https://openalex.org/W3106858249","https://openalex.org/W3112567928","https://openalex.org/W3124639310","https://openalex.org/W3174656328","https://openalex.org/W3181069167","https://openalex.org/W3187411329","https://openalex.org/W3187415662","https://openalex.org/W3194557739","https://openalex.org/W3200445214","https://openalex.org/W4283816693","https://openalex.org/W6631190155","https://openalex.org/W6636510571","https://openalex.org/W6638667902","https://openalex.org/W6684191040","https://openalex.org/W6739901393","https://openalex.org/W6754364688","https://openalex.org/W6760307120","https://openalex.org/W6768438993","https://openalex.org/W6777516870","https://openalex.org/W6789753369"],"related_works":["https://openalex.org/W2275988210","https://openalex.org/W2961085424","https://openalex.org/W4306674287","https://openalex.org/W4224009465","https://openalex.org/W2385621972","https://openalex.org/W2357241418","https://openalex.org/W4286629047","https://openalex.org/W4306321456","https://openalex.org/W4285260836","https://openalex.org/W3046775127"],"abstract_inverted_index":{"Pedestrian":[0],"attribute":[1,66,105,120,143],"recognition":[2,135],"(PAR),":[3],"which":[4,92],"aims":[5],"to":[6,21,47,52,98,138,190,196],"identify":[7],"attributes":[8,46],"of":[9,25,55,112,142,186],"the":[10,22,53,61,100,140,151,184],"pedestrians":[11],"captured":[12],"in":[13,71,155,210],"video":[14],"surveillance,":[15],"is":[16,68,194,216],"a":[17,39,85,199],"challenging":[18],"task":[19],"due":[20],"poor":[23],"quality":[24],"images":[26,118],"and":[27,43,119,124,133,160,181,203],"diverse":[28],"spatial":[29],"distribution":[30],"among":[31,65],"attributes.":[32],"Existing":[33],"methods":[34,173],"usually":[35],"model":[36],"PAR":[37,91,201,212],"as":[38,150,198],"multi-label":[40],"classification":[41],"problem":[42],"manually":[44],"map":[45],"an":[48,94],"ordered":[49],"list":[50],"corresponding":[51],"outputs":[54],"classifiers":[56],"or":[57],"sequential":[58],"models.":[59],"However,":[60],"inherent":[62],"textual":[63,96,101,109,125,188],"information":[64,130],"annotations":[67,106,121],"largely":[69],"neglected":[70],"these":[72],"visual-only":[73,172],"methods.":[74],"In":[75],"this":[76,81],"paper,":[77],"we":[78,146],"first":[79],"alleviate":[80],"issue":[82],"by":[83,107],"proposing":[84],"novel":[86],"visual-textual":[87],"baseline":[88,202],"(VTB)":[89],"for":[90,157,207],"introduces":[93],"additional":[95],"modality":[97,189],"explore":[99],"semantic":[102],"correlations":[103,162],"from":[104],"pre-trained":[108],"encoders":[110],"instead":[111],"human":[113],"definitions.":[114],"VTB":[115,156],"encodes":[116],"pedestrian":[117],"into":[122],"visual":[123],"features":[126],"respectively,":[127],"interacts":[128],"with":[129],"across":[131],"modalities,":[132],"predicts":[134],"results":[136],"independently":[137],"remove":[139],"influence":[141],"orders.":[144],"Furthermore,":[145],"introduce":[147],"transformer":[148],"encoder":[149],"cross-modal":[152,161],"fusion":[153,209],"module":[154],"sufficient":[158],"intra-modal":[159],"exploration.":[163],"Our":[164,192,214],"method":[165,193],"achieves":[166],"superior":[167],"performance":[168],"over":[169],"most":[170],"existing":[171],"on":[174],"two":[175],"widely":[176],"used":[177],"datasets":[178],"including":[179],"RAP":[180],"PA-100K,":[182],"demonstrating":[183],"effectiveness":[185],"utilizing":[187],"PAR.":[191],"expected":[195],"serve":[197],"multimodal":[200,208],"inspire":[204],"new":[205],"insights":[206],"future":[211],"research.":[213],"code":[215],"available":[217],"at":[218],"<uri":[219],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[220],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">https://github.com/cxh0519/VTB</uri>":[221],".":[222]},"counts_by_year":[{"year":2026,"cited_by_count":5},{"year":2025,"cited_by_count":18},{"year":2024,"cited_by_count":14},{"year":2023,"cited_by_count":9},{"year":2022,"cited_by_count":2}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
