{"id":"https://openalex.org/W4312415723","doi":"https://doi.org/10.1109/icpr56361.2022.9956607","title":"Cross-modal Contrastive Distillation for Instructional Activity Anticipation","display_name":"Cross-modal Contrastive Distillation for Instructional Activity Anticipation","publication_year":2022,"publication_date":"2022-08-21","ids":{"openalex":"https://openalex.org/W4312415723","doi":"https://doi.org/10.1109/icpr56361.2022.9956607"},"language":"en","primary_location":{"id":"doi:10.1109/icpr56361.2022.9956607","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icpr56361.2022.9956607","pdf_url":null,"source":{"id":"https://openalex.org/S4363607731","display_name":"2022 26th International Conference on Pattern Recognition (ICPR)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 26th International Conference on Pattern Recognition (ICPR)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5050209478","display_name":"Zhengyuan Yang","orcid":"https://orcid.org/0000-0002-5808-0889"},"institutions":[{"id":"https://openalex.org/I5388228","display_name":"University of Rochester","ror":"https://ror.org/022kthw22","country_code":"US","type":"education","lineage":["https://openalex.org/I5388228"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Zhengyuan Yang","raw_affiliation_strings":["University of Rochester,Department of Computer Science","Department of Computer Science, University of Rochester"],"affiliations":[{"raw_affiliation_string":"University of Rochester,Department of Computer Science","institution_ids":["https://openalex.org/I5388228"]},{"raw_affiliation_string":"Department of Computer Science, University of Rochester","institution_ids":["https://openalex.org/I5388228"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5032552506","display_name":"Jingen Liu","orcid":"https://orcid.org/0000-0003-3133-3644"},"institutions":[{"id":"https://openalex.org/I72427458","display_name":"JDSU (United States)","ror":"https://ror.org/01a5v8x09","country_code":"US","type":"company","lineage":["https://openalex.org/I72427458"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jingen Liu","raw_affiliation_strings":["JD AI Research"],"affiliations":[{"raw_affiliation_string":"JD AI Research","institution_ids":["https://openalex.org/I72427458"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5050624362","display_name":"Jing Huang","orcid":"https://orcid.org/0000-0001-9301-9410"},"institutions":[{"id":"https://openalex.org/I72427458","display_name":"JDSU (United States)","ror":"https://ror.org/01a5v8x09","country_code":"US","type":"company","lineage":["https://openalex.org/I72427458"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jing Huang","raw_affiliation_strings":["JD AI Research"],"affiliations":[{"raw_affiliation_string":"JD AI Research","institution_ids":["https://openalex.org/I72427458"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101727205","display_name":"Xiaodong He","orcid":"https://orcid.org/0000-0002-9463-9168"},"institutions":[{"id":"https://openalex.org/I72427458","display_name":"JDSU (United States)","ror":"https://ror.org/01a5v8x09","country_code":"US","type":"company","lineage":["https://openalex.org/I72427458"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Xiaodong He","raw_affiliation_strings":["JD AI Research"],"affiliations":[{"raw_affiliation_string":"JD AI Research","institution_ids":["https://openalex.org/I72427458"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5017597537","display_name":"Tao Mei","orcid":"https://orcid.org/0000-0003-2497-7732"},"institutions":[{"id":"https://openalex.org/I72427458","display_name":"JDSU (United States)","ror":"https://ror.org/01a5v8x09","country_code":"US","type":"company","lineage":["https://openalex.org/I72427458"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Tao Mei","raw_affiliation_strings":["JD AI Research"],"affiliations":[{"raw_affiliation_string":"JD AI Research","institution_ids":["https://openalex.org/I72427458"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5064805926","display_name":"Chenliang Xu","orcid":"https://orcid.org/0000-0002-2183-822X"},"institutions":[{"id":"https://openalex.org/I5388228","display_name":"University of Rochester","ror":"https://ror.org/022kthw22","country_code":"US","type":"education","lineage":["https://openalex.org/I5388228"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Chenliang Xu","raw_affiliation_strings":["University of Rochester,Department of Computer Science","Department of Computer Science, University of Rochester"],"affiliations":[{"raw_affiliation_string":"University of Rochester,Department of Computer Science","institution_ids":["https://openalex.org/I5388228"]},{"raw_affiliation_string":"Department of Computer Science, University of Rochester","institution_ids":["https://openalex.org/I5388228"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5055469774","display_name":"Jiebo Luo","orcid":"https://orcid.org/0000-0002-4516-9729"},"institutions":[{"id":"https://openalex.org/I5388228","display_name":"University of Rochester","ror":"https://ror.org/022kthw22","country_code":"US","type":"education","lineage":["https://openalex.org/I5388228"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jiebo Luo","raw_affiliation_strings":["University of Rochester,Department of Computer Science","Department of Computer Science, University of Rochester"],"affiliations":[{"raw_affiliation_string":"University of Rochester,Department of Computer Science","institution_ids":["https://openalex.org/I5388228"]},{"raw_affiliation_string":"Department of Computer Science, University of Rochester","institution_ids":["https://openalex.org/I5388228"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5050209478"],"corresponding_institution_ids":["https://openalex.org/I5388228"],"apc_list":null,"apc_paid":null,"fwci":0.2996,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.64237844,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"5002","last_page":"5009"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9975000023841858,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7934340834617615},{"id":"https://openalex.org/keywords/anticipation","display_name":"Anticipation (artificial intelligence)","score":0.7136547565460205},{"id":"https://openalex.org/keywords/margin","display_name":"Margin (machine learning)","score":0.6516493558883667},{"id":"https://openalex.org/keywords/distillation","display_name":"Distillation","score":0.6041631102561951},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6040043830871582},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5590316653251648},{"id":"https://openalex.org/keywords/modalities","display_name":"Modalities","score":0.5394557118415833},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.5224663019180298},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4865783452987671},{"id":"https://openalex.org/keywords/modality","display_name":"Modality (human\u2013computer interaction)","score":0.486378937959671},{"id":"https://openalex.org/keywords/modal","display_name":"Modal","score":0.4711571931838989},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4109892249107361},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.3542299270629883},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.09322869777679443}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7934340834617615},{"id":"https://openalex.org/C176777502","wikidata":"https://www.wikidata.org/wiki/Q4774623","display_name":"Anticipation (artificial intelligence)","level":2,"score":0.7136547565460205},{"id":"https://openalex.org/C774472","wikidata":"https://www.wikidata.org/wiki/Q6760393","display_name":"Margin (machine learning)","level":2,"score":0.6516493558883667},{"id":"https://openalex.org/C204030448","wikidata":"https://www.wikidata.org/wiki/Q101017","display_name":"Distillation","level":2,"score":0.6041631102561951},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6040043830871582},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5590316653251648},{"id":"https://openalex.org/C2779903281","wikidata":"https://www.wikidata.org/wiki/Q6888026","display_name":"Modalities","level":2,"score":0.5394557118415833},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.5224663019180298},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4865783452987671},{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.486378937959671},{"id":"https://openalex.org/C71139939","wikidata":"https://www.wikidata.org/wiki/Q910194","display_name":"Modal","level":2,"score":0.4711571931838989},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4109892249107361},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.3542299270629883},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.09322869777679443},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C36289849","wikidata":"https://www.wikidata.org/wiki/Q34749","display_name":"Social science","level":1,"score":0.0},{"id":"https://openalex.org/C178790620","wikidata":"https://www.wikidata.org/wiki/Q11351","display_name":"Organic chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C188027245","wikidata":"https://www.wikidata.org/wiki/Q750446","display_name":"Polymer chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icpr56361.2022.9956607","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icpr56361.2022.9956607","pdf_url":null,"source":{"id":"https://openalex.org/S4363607731","display_name":"2022 26th International Conference on Pattern Recognition (ICPR)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 26th International Conference on Pattern Recognition (ICPR)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":66,"referenced_works":["https://openalex.org/W753847829","https://openalex.org/W1522301498","https://openalex.org/W1690739335","https://openalex.org/W1821462560","https://openalex.org/W1905882502","https://openalex.org/W2016776918","https://openalex.org/W2139501017","https://openalex.org/W2147615062","https://openalex.org/W2185953016","https://openalex.org/W2194775991","https://openalex.org/W2198577854","https://openalex.org/W2422305492","https://openalex.org/W2424778531","https://openalex.org/W2561238782","https://openalex.org/W2592335154","https://openalex.org/W2737041163","https://openalex.org/W2774267535","https://openalex.org/W2795410839","https://openalex.org/W2885024018","https://openalex.org/W2889796598","https://openalex.org/W2905385096","https://openalex.org/W2945792291","https://openalex.org/W2955189650","https://openalex.org/W2963351113","https://openalex.org/W2963570630","https://openalex.org/W2963888093","https://openalex.org/W2981635073","https://openalex.org/W2981694290","https://openalex.org/W2988823324","https://openalex.org/W2993447238","https://openalex.org/W2995607862","https://openalex.org/W2997591391","https://openalex.org/W3020843970","https://openalex.org/W3034679267","https://openalex.org/W3035323998","https://openalex.org/W3035365026","https://openalex.org/W3035392611","https://openalex.org/W3084937072","https://openalex.org/W3085043532","https://openalex.org/W3091588028","https://openalex.org/W3106768499","https://openalex.org/W3132567969","https://openalex.org/W3139732141","https://openalex.org/W3181159501","https://openalex.org/W3199693760","https://openalex.org/W4287113019","https://openalex.org/W4292779060","https://openalex.org/W4294149591","https://openalex.org/W4385245566","https://openalex.org/W6631190155","https://openalex.org/W6637551013","https://openalex.org/W6638523607","https://openalex.org/W6728610325","https://openalex.org/W6730179637","https://openalex.org/W6739901393","https://openalex.org/W6747225742","https://openalex.org/W6753596515","https://openalex.org/W6754469826","https://openalex.org/W6769906912","https://openalex.org/W6775970589","https://openalex.org/W6778883912","https://openalex.org/W6782269215","https://openalex.org/W6797716411","https://openalex.org/W6801155572","https://openalex.org/W6840157293","https://openalex.org/W6898505805"],"related_works":["https://openalex.org/W73545470","https://openalex.org/W4224266612","https://openalex.org/W2383394264","https://openalex.org/W4320153225","https://openalex.org/W4293261942","https://openalex.org/W3125968744","https://openalex.org/W2167701463","https://openalex.org/W2110287964","https://openalex.org/W4307407935","https://openalex.org/W649759291"],"abstract_inverted_index":{"In":[0],"this":[1,73],"study,":[2],"we":[3,75,121],"aim":[4,31],"to":[5,60,82,89],"predict":[6],"the":[7,16,20,61,68,84,91,104,109,112,118,142,152,158,162,179],"plausible":[8],"future":[9,51],"action":[10,33,52],"steps":[11],"given":[12],"an":[13],"observation":[14],"of":[15,22,50,63,161,170],"past":[17],"and":[18,47,114,136],"study":[19],"task":[21,58],"instructional":[23,69],"activity":[24],"anticipation.":[25],"Unlike":[26],"previous":[27,96],"anticipation":[28,93,159],"tasks":[29],"that":[30,44],"at":[32,39],"label":[34],"prediction,":[35],"our":[36,149],"work":[37],"targets":[38],"generating":[40],"natural":[41],"language":[42],"outputs":[43],"provide":[45],"interpretable":[46],"accurate":[48],"descriptions":[49],"steps.":[53],"It":[54],"is":[55],"a":[56,77,123,167,183],"challenging":[57],"due":[59],"lack":[62],"semantic":[64],"information":[65,102],"extracted":[66],"from":[67],"videos.":[70],"To":[71,107],"overcome":[72],"challenge,":[74],"propose":[76],"novel":[78,124],"knowledge":[79,88,97,132],"distillation":[80,98,119,127,133,145],"framework":[81],"exploit":[83],"related":[85],"external":[86],"textual":[87],"assist":[90],"visual":[92,113],"task.":[94],"However,":[95],"techniques":[99],"generally":[100],"transfer":[101],"within":[103],"same":[105],"modality.":[106],"bridge":[108],"gap":[110],"between":[111,134],"text":[115],"modalities":[116,140],"during":[117],"process,":[120],"devise":[122],"cross-modal":[125,144],"contrastive":[126],"(CCD)":[128],"scheme,":[129],"which":[130],"facilitates":[131],"teacher":[135],"student":[137,164],"in":[138,173],"heterogeneous":[139],"with":[141],"proposed":[143],"loss.":[146],"We":[147],"evaluate":[148],"method":[150],"on":[151],"Tasty":[153],"Videos":[154],"dataset.":[155],"CCD":[156],"improves":[157],"performance":[160],"visual-alone":[163],"model":[165],"by":[166,182],"large":[168,184],"margin":[169],"40.2%":[171],"relatively":[172],"BLEU4.":[174],"Our":[175],"approach":[176],"also":[177],"outperforms":[178],"state-of-the-art":[180],"approaches":[181],"margin.":[185]},"counts_by_year":[{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
