{"id":"https://openalex.org/W4405844902","doi":"https://doi.org/10.1109/tmm.2024.3521658","title":"GPT4Ego: Unleashing the Potential of Pre-Trained Models for Zero-Shot Egocentric Action Recognition","display_name":"GPT4Ego: Unleashing the Potential of Pre-Trained Models for Zero-Shot Egocentric Action Recognition","publication_year":2024,"publication_date":"2024-12-27","ids":{"openalex":"https://openalex.org/W4405844902","doi":"https://doi.org/10.1109/tmm.2024.3521658"},"language":"en","primary_location":{"id":"doi:10.1109/tmm.2024.3521658","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2024.3521658","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Multimedia","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5016663057","display_name":"Guangzhao Dai","orcid":"https://orcid.org/0000-0003-4111-9334"},"institutions":[{"id":"https://openalex.org/I36399199","display_name":"Nanjing University of Science and Technology","ror":"https://ror.org/00xp9wg62","country_code":"CN","type":"education","lineage":["https://openalex.org/I36399199"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Guangzhao Dai","raw_affiliation_strings":["School of Computer Science and Engineering, Nanjing University of Science and Technology, Nanjing, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering, Nanjing University of Science and Technology, Nanjing, China","institution_ids":["https://openalex.org/I36399199"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040437528","display_name":"Xiangbo Shu","orcid":"https://orcid.org/0000-0003-4902-4663"},"institutions":[{"id":"https://openalex.org/I36399199","display_name":"Nanjing University of Science and Technology","ror":"https://ror.org/00xp9wg62","country_code":"CN","type":"education","lineage":["https://openalex.org/I36399199"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiangbo Shu","raw_affiliation_strings":["School of Computer Science and Engineering, Nanjing University of Science and Technology, Nanjing, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering, Nanjing University of Science and Technology, Nanjing, China","institution_ids":["https://openalex.org/I36399199"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113395448","display_name":"Wenhao Wu","orcid":null},"institutions":[{"id":"https://openalex.org/I129604602","display_name":"The University of Sydney","ror":"https://ror.org/0384j8v12","country_code":"AU","type":"education","lineage":["https://openalex.org/I129604602"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Wenhao Wu","raw_affiliation_strings":["School of Computer Science, The University of Sydney, Sydney NSW, Australia","School of Computer Science, The University of Sydney, NSW, Australia"],"affiliations":[{"raw_affiliation_string":"School of Computer Science, The University of Sydney, Sydney NSW, Australia","institution_ids":["https://openalex.org/I129604602"]},{"raw_affiliation_string":"School of Computer Science, The University of Sydney, NSW, Australia","institution_ids":["https://openalex.org/I129604602"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100716371","display_name":"Rui Yan","orcid":"https://orcid.org/0000-0002-0694-9458"},"institutions":[{"id":"https://openalex.org/I36399199","display_name":"Nanjing University of Science and Technology","ror":"https://ror.org/00xp9wg62","country_code":"CN","type":"education","lineage":["https://openalex.org/I36399199"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Rui Yan","raw_affiliation_strings":["Department of Computer Science and Technology, Nanjing University, Nanjing, China"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Technology, Nanjing University, Nanjing, China","institution_ids":["https://openalex.org/I36399199"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5102020945","display_name":"Jiachao Zhang","orcid":"https://orcid.org/0000-0002-3124-9461"},"institutions":[{"id":"https://openalex.org/I2799736854","display_name":"Nanjing Institute of Technology","ror":"https://ror.org/00n6txq60","country_code":"CN","type":"education","lineage":["https://openalex.org/I2799736854"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiachao Zhang","raw_affiliation_strings":["Artificial Intelligence Industrial Technology Research Institute, Nanjing Institute of Technology, Nanjing, China"],"affiliations":[{"raw_affiliation_string":"Artificial Intelligence Industrial Technology Research Institute, Nanjing Institute of Technology, Nanjing, China","institution_ids":["https://openalex.org/I2799736854"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5016663057"],"corresponding_institution_ids":["https://openalex.org/I36399199"],"apc_list":null,"apc_paid":null,"fwci":1.7124,"has_fulltext":false,"cited_by_count":7,"citation_normalized_percentile":{"value":0.86352039,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":97,"max":100},"biblio":{"volume":"27","issue":null,"first_page":"401","last_page":"413"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11512","display_name":"Anomaly Detection Techniques and Applications","score":0.9664000272750854,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13702","display_name":"Machine Learning in Healthcare","score":0.9366999864578247,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7824829816818237},{"id":"https://openalex.org/keywords/action-recognition","display_name":"Action recognition","score":0.6146956086158752},{"id":"https://openalex.org/keywords/zero","display_name":"Zero (linguistics)","score":0.5880616307258606},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5615593194961548},{"id":"https://openalex.org/keywords/shot","display_name":"Shot (pellet)","score":0.5266901254653931},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.46039101481437683},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.4331203103065491},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.4192246198654175},{"id":"https://openalex.org/keywords/class","display_name":"Class (philosophy)","score":0.08650761842727661}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7824829816818237},{"id":"https://openalex.org/C2987834672","wikidata":"https://www.wikidata.org/wiki/Q4677630","display_name":"Action recognition","level":3,"score":0.6146956086158752},{"id":"https://openalex.org/C2780813799","wikidata":"https://www.wikidata.org/wiki/Q3274237","display_name":"Zero (linguistics)","level":2,"score":0.5880616307258606},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5615593194961548},{"id":"https://openalex.org/C2778344882","wikidata":"https://www.wikidata.org/wiki/Q278938","display_name":"Shot (pellet)","level":2,"score":0.5266901254653931},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.46039101481437683},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.4331203103065491},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.4192246198654175},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.08650761842727661},{"id":"https://openalex.org/C178790620","wikidata":"https://www.wikidata.org/wiki/Q11351","display_name":"Organic chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tmm.2024.3521658","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2024.3521658","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Multimedia","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/10","display_name":"Reduced inequalities","score":0.44999998807907104}],"awards":[{"id":"https://openalex.org/G3636126384","display_name":null,"funder_award_id":"61932020","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5512729199","display_name":null,"funder_award_id":"62222207","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5558408212","display_name":null,"funder_award_id":"62072245","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5950360714","display_name":null,"funder_award_id":"BK20211520","funder_id":"https://openalex.org/F4320322769","funder_display_name":"Natural Science Foundation of Jiangsu Province"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320322769","display_name":"Natural Science Foundation of Jiangsu Province","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":77,"referenced_works":["https://openalex.org/W2050398567","https://openalex.org/W2895299763","https://openalex.org/W2955874753","https://openalex.org/W2961553857","https://openalex.org/W2963082988","https://openalex.org/W2963524571","https://openalex.org/W2974686944","https://openalex.org/W2976669726","https://openalex.org/W2982084422","https://openalex.org/W2997004687","https://openalex.org/W3048602385","https://openalex.org/W3096719817","https://openalex.org/W3126721948","https://openalex.org/W3163585755","https://openalex.org/W3203574385","https://openalex.org/W3203711169","https://openalex.org/W3205786327","https://openalex.org/W3205959870","https://openalex.org/W3206930349","https://openalex.org/W3207758636","https://openalex.org/W4205924026","https://openalex.org/W4210915468","https://openalex.org/W4226192497","https://openalex.org/W4284681282","https://openalex.org/W4292387248","https://openalex.org/W4309368547","https://openalex.org/W4312614039","https://openalex.org/W4312710666","https://openalex.org/W4319993407","https://openalex.org/W4362653498","https://openalex.org/W4378373476","https://openalex.org/W4382467086","https://openalex.org/W4382467684","https://openalex.org/W4385245566","https://openalex.org/W4386065251","https://openalex.org/W4386065565","https://openalex.org/W4386066015","https://openalex.org/W4386076314","https://openalex.org/W4386083063","https://openalex.org/W4387969042","https://openalex.org/W4387973771","https://openalex.org/W4388331220","https://openalex.org/W4390872505","https://openalex.org/W4390873954","https://openalex.org/W4390874575","https://openalex.org/W4391547509","https://openalex.org/W4393147967","https://openalex.org/W4393178525","https://openalex.org/W4397026482","https://openalex.org/W4402726948","https://openalex.org/W4402778186","https://openalex.org/W4405566635","https://openalex.org/W6750355821","https://openalex.org/W6753924131","https://openalex.org/W6766978945","https://openalex.org/W6767624375","https://openalex.org/W6778883912","https://openalex.org/W6791353385","https://openalex.org/W6801567822","https://openalex.org/W6810081322","https://openalex.org/W6810334672","https://openalex.org/W6840478680","https://openalex.org/W6846313647","https://openalex.org/W6846867676","https://openalex.org/W6849177959","https://openalex.org/W6849990444","https://openalex.org/W6850625674","https://openalex.org/W6851592950","https://openalex.org/W6851950068","https://openalex.org/W6853165786","https://openalex.org/W6853242345","https://openalex.org/W6857461731","https://openalex.org/W6857822741","https://openalex.org/W6857901785","https://openalex.org/W6858258359","https://openalex.org/W6858765753","https://openalex.org/W6859273674"],"related_works":["https://openalex.org/W2074502265","https://openalex.org/W4214877189","https://openalex.org/W2773965352","https://openalex.org/W2381179799","https://openalex.org/W2980279061","https://openalex.org/W2334685461","https://openalex.org/W2366718574","https://openalex.org/W2359774528","https://openalex.org/W1576128429","https://openalex.org/W2269464716"],"abstract_inverted_index":{"Vision-Language":[0],"Models":[1],"(VLMs),":[2],"pre-trained":[3,256,273],"on":[4,84,199],"large-scale":[5,201],"datasets,":[6],"have":[7],"shown":[8],"impressive":[9],"performance":[10,23],"in":[11,24,43,91],"various":[12],"visual":[13],"recognition":[14],"tasks.":[15],"This":[16],"advancement":[17,253],"paves":[18],"the":[19,85,113,187,238,252,265],"way":[20],"for":[21,74],"notable":[22],"some":[25],"egocentric":[26,92,202,266],"tasks,":[27],"Zero-Shot":[28],"Egocentric":[29],"Action":[30],"Recognition":[31],"(ZS-EAR),":[32],"entailing":[33],"VLMs":[34,49,198],"zero-shot":[35],"to":[36,61,111,149,182,268],"recognize":[37],"actions":[38],"from":[39,237],"first-person":[40],"videos":[41],"enriched":[42],"more":[44,270],"realistic":[45],"human-environment":[46],"interactions.":[47],"Typically,":[48],"handle":[50],"ZS-EAR":[51,75],"as":[52],"a":[53,71,99,128,163],"global":[54],"video-text":[55],"matching":[56],"task,":[57],"which":[58,138],"often":[59],"leads":[60],"suboptimal":[62],"alignment":[63,81,115],"of":[64,116,189,241,254],"vision":[65,121],"and":[66,88,118,122,224,244],"linguistic":[67],"knowledge.":[68],"We":[69,259],"propose":[70,127],"refined":[72],"approach":[73],"using":[76],"VLMs,":[77],"emphasizing":[78],"fine-grained":[79,114,242],"concept-description":[80],"that":[82,173],"capitalizes":[83],"rich":[86],"semantic":[87],"contextual":[89,151,184],"details":[90],"videos.":[93],"In":[94,234],"this":[95,261],"work,":[96],"we":[97,125,161],"introduce":[98],"straightforward":[100],"yet":[101],"remarkably":[102],"potent":[103],"VLM":[104],"framework,":[105],"<italic":[106],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[107,134,169,208,211,217,220,227,230],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">aka</i>":[108],"GPT4Ego,":[109],"designed":[110],"enhance":[112],"concept":[117,243],"description":[119,245],"between":[120],"language.":[123],"Specifically,":[124],"first":[126],"new":[129,164],"Ego-oriented":[130,165],"Text":[131],"Prompting":[132],"(EgoTP<inline-formula":[133],"xmlns:xlink=\"http://www.w3.org/1999/xlink\"><tex-math":[135,170,209,212,218,221,228,231],"notation=\"LaTeX\">$\\spadesuit$</tex-math></inline-formula>)":[136],"scheme,":[137],"effectively":[139],"prompts":[140],"action-related":[141,175],"text-contextual":[142],"semantics":[143,177],"by":[144,153,178],"evolving":[145],"word-level":[146],"class":[147],"names":[148],"sentence-level":[150],"descriptions":[152],"ChatGPT":[154],"with":[155,186,251],"well-designed":[156],"chain-of-thought":[157],"textual":[158],"prompts.":[159],"Moreover,":[160],"design":[162],"Visual":[166],"Parsing":[167],"(EgoVP<inline-formula":[168],"notation=\"LaTeX\">$\\clubsuit$</tex-math></inline-formula>)":[171],"strategy":[172],"learns":[174],"vision-contextual":[176],"refining":[179],"global-level":[180],"images":[181],"part-level":[183],"concepts":[185],"help":[188],"SAM.":[190],"Extensive":[191],"experiments":[192],"demonstrate":[193],"GPT4Ego":[194,247],"significantly":[195],"outperforms":[196],"existing":[197],"three":[200],"video":[203],"benchmarks,":[204],"i.e.,":[205],"EPIC-KITCHENS-100":[206],"(33.2%<inline-formula":[207],"notation=\"LaTeX\">$\\uparrow$</tex-math></inline-formula><inline-formula":[210,219,229],"notation=\"LaTeX\">$_{\\bm":[213,222,232],"{+9.4}}$</tex-math></inline-formula>),":[214],"EGTEA":[215],"(39.6%<inline-formula":[216],"{+5.5}}$</tex-math></inline-formula>),":[223],"CharadesEgo":[225],"(31.5%<inline-formula":[226],"{+2.6}}$</tex-math></inline-formula>).":[233],"addition,":[235],"benefiting":[236],"novel":[239],"mechanism":[240],"alignment,":[246],"can":[248,263],"sustainably":[249],"evolve":[250],"ever-growing":[255],"foundational":[257],"models.":[258,275],"hope":[260],"work":[262],"encourage":[264],"community":[267],"build":[269],"investigation":[271],"into":[272],"vision-language":[274]},"counts_by_year":[{"year":2026,"cited_by_count":3},{"year":2025,"cited_by_count":4}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
