{"id":"https://openalex.org/W4391547496","doi":"https://doi.org/10.1109/tpami.2024.3362288","title":"Multi-Task Learning of Object States and State-Modifying Actions From Web Videos","display_name":"Multi-Task Learning of Object States and State-Modifying Actions From Web Videos","publication_year":2024,"publication_date":"2024-02-05","ids":{"openalex":"https://openalex.org/W4391547496","doi":"https://doi.org/10.1109/tpami.2024.3362288","pmid":"https://pubmed.ncbi.nlm.nih.gov/38315606"},"language":"en","primary_location":{"id":"doi:10.1109/tpami.2024.3362288","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpami.2024.3362288","pdf_url":null,"source":{"id":"https://openalex.org/S199944782","display_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","issn_l":"0162-8828","issn":["0162-8828","1939-3539","2160-9292"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5114549498","display_name":"Tom\u00e1\u0161 Sou\u010dek","orcid":"https://orcid.org/0000-0001-6911-5517"},"institutions":[{"id":"https://openalex.org/I44504214","display_name":"Czech Technical University in Prague","ror":"https://ror.org/03kqpb082","country_code":"CZ","type":"education","lineage":["https://openalex.org/I44504214"]}],"countries":["CZ"],"is_corresponding":true,"raw_author_name":"Tom\u00e1\u0161 Sou\u010dek","raw_affiliation_strings":["Czech Institute of Informatics, Robotics and Cybernetics, Czech Technical University, Prague, Czechia"],"affiliations":[{"raw_affiliation_string":"Czech Institute of Informatics, Robotics and Cybernetics, Czech Technical University, Prague, Czechia","institution_ids":["https://openalex.org/I44504214"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5012826868","display_name":"Jean-Baptiste Alayrac","orcid":"https://orcid.org/0000-0002-3071-4157"},"institutions":[{"id":"https://openalex.org/I4210090411","display_name":"Google DeepMind (United Kingdom)","ror":"https://ror.org/00971b260","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210090411","https://openalex.org/I4210128969"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Jean-Baptiste Alayrac","raw_affiliation_strings":["DeepMind, London, U.K","DeepMind, United Kingdom"],"affiliations":[{"raw_affiliation_string":"DeepMind, London, U.K","institution_ids":["https://openalex.org/I4210090411"]},{"raw_affiliation_string":"DeepMind, United Kingdom","institution_ids":["https://openalex.org/I4210090411"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5021766835","display_name":"Antoine Miech","orcid":"https://orcid.org/0000-0001-6657-7812"},"institutions":[{"id":"https://openalex.org/I4210090411","display_name":"Google DeepMind (United Kingdom)","ror":"https://ror.org/00971b260","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210090411","https://openalex.org/I4210128969"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Antoine Miech","raw_affiliation_strings":["DeepMind, London, U.K","DeepMind, United Kingdom"],"affiliations":[{"raw_affiliation_string":"DeepMind, London, U.K","institution_ids":["https://openalex.org/I4210090411"]},{"raw_affiliation_string":"DeepMind, United Kingdom","institution_ids":["https://openalex.org/I4210090411"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5087781064","display_name":"Ivan Laptev","orcid":"https://orcid.org/0000-0001-7072-3325"},"institutions":[{"id":"https://openalex.org/I1294671590","display_name":"Centre National de la Recherche Scientifique","ror":"https://ror.org/02feahw73","country_code":"FR","type":"government","lineage":["https://openalex.org/I1294671590"]},{"id":"https://openalex.org/I2746051580","display_name":"Universit\u00e9 Paris Sciences et Lettres","ror":"https://ror.org/013cjyk83","country_code":"FR","type":"education","lineage":["https://openalex.org/I2746051580"]},{"id":"https://openalex.org/I29607241","display_name":"\u00c9cole Normale Sup\u00e9rieure - PSL","ror":"https://ror.org/05a0dhs15","country_code":"FR","type":"other","lineage":["https://openalex.org/I2746051580","https://openalex.org/I29607241"]},{"id":"https://openalex.org/I4210161954","display_name":"D\u00e9partement d'Informatique","ror":"https://ror.org/05y6rqs46","country_code":"FR","type":"facility","lineage":["https://openalex.org/I1294671590","https://openalex.org/I1294671590","https://openalex.org/I1326498283","https://openalex.org/I2746051580","https://openalex.org/I29607241","https://openalex.org/I4210159245","https://openalex.org/I4210161954"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Ivan Laptev","raw_affiliation_strings":["Inria and D&#x00E9;partement d&#x2019;informatique de l&#x2019;ENS, &#x00C9;cole normale sup&#x00E9;rieure, CNRS, PSL Research University, Paris, France"],"affiliations":[{"raw_affiliation_string":"Inria and D&#x00E9;partement d&#x2019;informatique de l&#x2019;ENS, &#x00C9;cole normale sup&#x00E9;rieure, CNRS, PSL Research University, Paris, France","institution_ids":["https://openalex.org/I29607241","https://openalex.org/I4210161954","https://openalex.org/I2746051580","https://openalex.org/I1294671590"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5025398783","display_name":"Josef \u0160ivic","orcid":"https://orcid.org/0000-0002-2554-5301"},"institutions":[{"id":"https://openalex.org/I44504214","display_name":"Czech Technical University in Prague","ror":"https://ror.org/03kqpb082","country_code":"CZ","type":"education","lineage":["https://openalex.org/I44504214"]}],"countries":["CZ"],"is_corresponding":false,"raw_author_name":"Josef Sivic","raw_affiliation_strings":["Czech Institute of Informatics, Robotics and Cybernetics, Czech Technical University, Prague, Czechia"],"affiliations":[{"raw_affiliation_string":"Czech Institute of Informatics, Robotics and Cybernetics, Czech Technical University, Prague, Czechia","institution_ids":["https://openalex.org/I44504214"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5114549498"],"corresponding_institution_ids":["https://openalex.org/I44504214"],"apc_list":null,"apc_paid":null,"fwci":1.9313,"has_fulltext":false,"cited_by_count":8,"citation_normalized_percentile":{"value":0.86354317,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":98},"biblio":{"volume":"46","issue":"7","first_page":"5114","last_page":"5130"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11512","display_name":"Anomaly Detection Techniques and Applications","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9968000054359436,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7495450973510742},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6527508497238159},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5947089195251465},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.48585864901542664},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4079326093196869},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.36664754152297974},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.08638998866081238}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7495450973510742},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6527508497238159},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5947089195251465},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.48585864901542664},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4079326093196869},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.36664754152297974},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.08638998866081238},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tpami.2024.3362288","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpami.2024.3362288","pdf_url":null,"source":{"id":"https://openalex.org/S199944782","display_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","issn_l":"0162-8828","issn":["0162-8828","1939-3539","2160-9292"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","raw_type":"journal-article"},{"id":"pmid:38315606","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/38315606","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on pattern analysis and machine intelligence","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.4099999964237213,"display_name":"Quality Education"}],"awards":[{"id":"https://openalex.org/G1076216274","display_name":null,"funder_award_id":"CZ.02.1.01/0.0/0.0/15_003/0000468","funder_id":"https://openalex.org/F4320335322","funder_display_name":"European Regional Development Fund"}],"funders":[{"id":"https://openalex.org/F4320320883","display_name":"Agence Nationale de la Recherche","ror":"https://ror.org/00rbzpz17"},{"id":"https://openalex.org/F4320335322","display_name":"European Regional Development Fund","ror":"https://ror.org/00k4n6c32"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":104,"referenced_works":["https://openalex.org/W24089286","https://openalex.org/W92662927","https://openalex.org/W262578090","https://openalex.org/W1773149199","https://openalex.org/W1926645898","https://openalex.org/W1948251820","https://openalex.org/W1967686239","https://openalex.org/W2097683810","https://openalex.org/W2099614498","https://openalex.org/W2108710284","https://openalex.org/W2126579184","https://openalex.org/W2321611732","https://openalex.org/W2425121537","https://openalex.org/W2507009361","https://openalex.org/W2514262209","https://openalex.org/W2563717578","https://openalex.org/W2594270457","https://openalex.org/W2606473278","https://openalex.org/W2736809457","https://openalex.org/W2755876276","https://openalex.org/W2768591600","https://openalex.org/W2777926144","https://openalex.org/W2795187948","https://openalex.org/W2796214461","https://openalex.org/W2799087757","https://openalex.org/W2888166343","https://openalex.org/W2889167066","https://openalex.org/W2913340405","https://openalex.org/W2914699769","https://openalex.org/W2952132648","https://openalex.org/W2955874753","https://openalex.org/W2957775769","https://openalex.org/W2960747818","https://openalex.org/W2962795934","https://openalex.org/W2962824366","https://openalex.org/W2962916463","https://openalex.org/W2963430933","https://openalex.org/W2963524571","https://openalex.org/W2963642716","https://openalex.org/W2963854351","https://openalex.org/W2963877604","https://openalex.org/W2964094654","https://openalex.org/W2964185501","https://openalex.org/W2972073579","https://openalex.org/W2975813532","https://openalex.org/W2982335217","https://openalex.org/W2983918066","https://openalex.org/W2984008963","https://openalex.org/W2986385672","https://openalex.org/W2990503944","https://openalex.org/W2997591727","https://openalex.org/W3009622574","https://openalex.org/W3034257141","https://openalex.org/W3034263732","https://openalex.org/W3034572008","https://openalex.org/W3034623254","https://openalex.org/W3034862692","https://openalex.org/W3035467150","https://openalex.org/W3035635319","https://openalex.org/W3047467323","https://openalex.org/W3092639633","https://openalex.org/W3103717557","https://openalex.org/W3106041614","https://openalex.org/W3106768499","https://openalex.org/W3123394884","https://openalex.org/W3126721948","https://openalex.org/W3143320354","https://openalex.org/W3158711590","https://openalex.org/W3168371806","https://openalex.org/W3174587031","https://openalex.org/W3177173029","https://openalex.org/W3189379416","https://openalex.org/W3202074654","https://openalex.org/W3203711169","https://openalex.org/W3205786327","https://openalex.org/W3207732590","https://openalex.org/W3211316995","https://openalex.org/W4214555767","https://openalex.org/W4214614183","https://openalex.org/W4214833734","https://openalex.org/W4287828128","https://openalex.org/W4297950672","https://openalex.org/W4312508659","https://openalex.org/W4312539810","https://openalex.org/W4312658081","https://openalex.org/W4312864639","https://openalex.org/W4313171390","https://openalex.org/W4313174527","https://openalex.org/W4320086121","https://openalex.org/W6600983433","https://openalex.org/W6682864246","https://openalex.org/W6747808108","https://openalex.org/W6749916090","https://openalex.org/W6757817989","https://openalex.org/W6774728187","https://openalex.org/W6780196116","https://openalex.org/W6780294235","https://openalex.org/W6781586772","https://openalex.org/W6789619165","https://openalex.org/W6790019176","https://openalex.org/W6790307280","https://openalex.org/W6791353385","https://openalex.org/W6843825870","https://openalex.org/W6849990444"],"related_works":["https://openalex.org/W2961085424","https://openalex.org/W4306674287","https://openalex.org/W2772917594","https://openalex.org/W2036807459","https://openalex.org/W2058170566","https://openalex.org/W2755342338","https://openalex.org/W2166024367","https://openalex.org/W3116076068","https://openalex.org/W2229312674","https://openalex.org/W2951359407"],"abstract_inverted_index":{"We":[0,26,128,168],"aim":[1],"to":[2,4],"learn":[3],"temporally":[5],"localize":[6],"object":[7,46,68,94,126],"state":[8,69],"changes":[9,124],"and":[10,83,96,102,121,184,189],"the":[11,44,55,62,181],"corresponding":[12,45],"state-modifying":[13,40],"actions":[14,41],"by":[15,61],"observing":[16],"people":[17],"interacting":[18],"with":[19,43,114],"objects":[20],"in":[21],"long":[22,151],"uncurated":[23,50,152],"web":[24,153],"videos.":[25],"introduce":[27],"three":[28],"principal":[29],"contributions.":[30],"First,":[31],"we":[32,77,107],"develop":[33],"a":[34,85,109,175],"self-supervised":[35,60],"model":[36,58,86,173],"for":[37,193],"jointly":[38],"learning":[39,91],"together":[42],"states":[47,95],"from":[48,54],"an":[49,132],"set":[51],"of":[52,92,119,125,148,150,178],"videos":[53,154],"Internet.":[56],"The":[57],"is":[59],"causal":[63],"ordering":[64],"signal,":[65],"i.e.,":[66],"initial":[67],"\u2192":[70,73],"manipulating":[71],"action":[72],"end":[74],"state.":[75],"Second,":[76],"explore":[78],"alternative":[79],"multi-task":[80,172],"network":[81],"architectures":[82],"identify":[84],"that":[87,170],"enables":[88],"efficient":[89],"joint":[90],"multiple":[93],"actions,":[97],"such":[98,158],"as":[99,138,140,159],"pouring":[100,103],"water":[101],"coffee,":[104],"together.":[105],"Third,":[106],"collect":[108],"new":[110,142],"dataset,":[111],"named":[112],"ChangeIt,":[113],"more":[115],"than":[116],"2600":[117],"hours":[118],"video":[120,135],"34":[122],"thousand":[123],"states.":[127],"report":[129],"results":[130],"on":[131],"existing":[133],"instructional":[134],"dataset":[136,145],"COIN":[137],"well":[139],"our":[141,171],"large-scale":[143],"ChangeIt":[144],"containing":[146],"tens":[147],"thousands":[149],"depicting":[155],"various":[156],"interactions":[157],"hole":[160],"drilling,":[161],"cream":[162],"whisking,":[163],"or":[164],"paper":[165],"plane":[166],"folding.":[167],"show":[169],"achieves":[174],"relative":[176],"improvement":[177],"40%":[179],"over":[180],"prior":[182],"methods":[183],"significantly":[185],"outperforms":[186],"both":[187],"image-based":[188],"video-based":[190],"zero-shot":[191],"models":[192],"this":[194],"problem.":[195]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":4},{"year":2024,"cited_by_count":3}],"updated_date":"2026-04-18T07:56:08.524223","created_date":"2025-10-10T00:00:00"}
