{"id":"https://openalex.org/W3207001124","doi":"https://doi.org/10.1145/3474085.3475322","title":"State-aware Video Procedural Captioning","display_name":"State-aware Video Procedural Captioning","publication_year":2021,"publication_date":"2021-10-17","ids":{"openalex":"https://openalex.org/W3207001124","doi":"https://doi.org/10.1145/3474085.3475322","mag":"3207001124"},"language":"en","primary_location":{"id":"doi:10.1145/3474085.3475322","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3474085.3475322","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 29th ACM International Conference on Multimedia","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5061593748","display_name":"Taichi Nishimura","orcid":"https://orcid.org/0000-0001-8725-7164"},"institutions":[{"id":"https://openalex.org/I22299242","display_name":"Kyoto University","ror":"https://ror.org/02kpeqv85","country_code":"JP","type":"education","lineage":["https://openalex.org/I22299242"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Taichi Nishimura","raw_affiliation_strings":["Kyoto University, Kyoto, Japan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Kyoto University, Kyoto, Japan","institution_ids":["https://openalex.org/I22299242"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038408644","display_name":"Atsushi Hashimoto","orcid":"https://orcid.org/0000-0002-0799-4269"},"institutions":[{"id":"https://openalex.org/I146230289","display_name":"Omron (Japan)","ror":"https://ror.org/00q0w1h45","country_code":"JP","type":"company","lineage":["https://openalex.org/I146230289"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Atsushi Hashimoto","raw_affiliation_strings":["OMRON SINIC X Corporation, Tokyo, Japan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"OMRON SINIC X Corporation, Tokyo, Japan","institution_ids":["https://openalex.org/I146230289"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5077707500","display_name":"Yoshitaka Ushiku","orcid":"https://orcid.org/0000-0002-9014-1389"},"institutions":[{"id":"https://openalex.org/I146230289","display_name":"Omron (Japan)","ror":"https://ror.org/00q0w1h45","country_code":"JP","type":"company","lineage":["https://openalex.org/I146230289"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Yoshitaka Ushiku","raw_affiliation_strings":["OMRON SINIC X Corporation, Tokyo, Japan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"OMRON SINIC X Corporation, Tokyo, Japan","institution_ids":["https://openalex.org/I146230289"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5000531397","display_name":"Hirotaka Kameko","orcid":"https://orcid.org/0000-0001-9844-6198"},"institutions":[{"id":"https://openalex.org/I22299242","display_name":"Kyoto University","ror":"https://ror.org/02kpeqv85","country_code":"JP","type":"education","lineage":["https://openalex.org/I22299242"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Hirotaka Kameko","raw_affiliation_strings":["Kyoto University, Kyoto, Japan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Kyoto University, Kyoto, Japan","institution_ids":["https://openalex.org/I22299242"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5001224773","display_name":"Shinsuke Mori","orcid":"https://orcid.org/0000-0001-8596-8667"},"institutions":[{"id":"https://openalex.org/I22299242","display_name":"Kyoto University","ror":"https://ror.org/02kpeqv85","country_code":"JP","type":"education","lineage":["https://openalex.org/I22299242"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Shinsuke Mori","raw_affiliation_strings":["Kyoto University, Kyoto, Japan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Kyoto University, Kyoto, Japan","institution_ids":["https://openalex.org/I22299242"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.1643,"has_fulltext":false,"cited_by_count":15,"citation_normalized_percentile":{"value":0.80971275,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"1766","last_page":"1774"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9965999722480774,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9955999851226807,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/closed-captioning","display_name":"Closed captioning","score":0.9687888622283936},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8440923690795898},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.7095962762832642},{"id":"https://openalex.org/keywords/state","display_name":"State (computer science)","score":0.5297654271125793},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.5051468014717102},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.45207759737968445},{"id":"https://openalex.org/keywords/tracking","display_name":"Tracking (education)","score":0.4371933341026306},{"id":"https://openalex.org/keywords/visualization","display_name":"Visualization","score":0.4312538802623749},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.41150805354118347},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.39155861735343933},{"id":"https://openalex.org/keywords/multimedia","display_name":"Multimedia","score":0.3224920630455017},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.2105565071105957},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.20690518617630005}],"concepts":[{"id":"https://openalex.org/C157657479","wikidata":"https://www.wikidata.org/wiki/Q2367247","display_name":"Closed captioning","level":3,"score":0.9687888622283936},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8440923690795898},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.7095962762832642},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.5297654271125793},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.5051468014717102},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.45207759737968445},{"id":"https://openalex.org/C2775936607","wikidata":"https://www.wikidata.org/wiki/Q466845","display_name":"Tracking (education)","level":2,"score":0.4371933341026306},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.4312538802623749},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.41150805354118347},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.39155861735343933},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.3224920630455017},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.2105565071105957},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.20690518617630005},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.0},{"id":"https://openalex.org/C19417346","wikidata":"https://www.wikidata.org/wiki/Q7922","display_name":"Pedagogy","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3474085.3475322","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3474085.3475322","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 29th ACM International Conference on Multimedia","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.7599999904632568,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":49,"referenced_works":["https://openalex.org/W639708223","https://openalex.org/W1947481528","https://openalex.org/W1956340063","https://openalex.org/W2101105183","https://openalex.org/W2107598941","https://openalex.org/W2108325777","https://openalex.org/W2123301721","https://openalex.org/W2173520492","https://openalex.org/W2187089797","https://openalex.org/W2194775991","https://openalex.org/W2250539671","https://openalex.org/W2250965435","https://openalex.org/W2252225847","https://openalex.org/W2252269235","https://openalex.org/W2519328139","https://openalex.org/W2526198870","https://openalex.org/W2594270457","https://openalex.org/W2606974598","https://openalex.org/W2737041163","https://openalex.org/W2784025607","https://openalex.org/W2883910824","https://openalex.org/W2885318751","https://openalex.org/W2949117887","https://openalex.org/W2950133940","https://openalex.org/W2951098185","https://openalex.org/W2962795934","https://openalex.org/W2963341956","https://openalex.org/W2963351113","https://openalex.org/W2963403868","https://openalex.org/W2963436881","https://openalex.org/W2963811641","https://openalex.org/W2963870701","https://openalex.org/W2963983586","https://openalex.org/W2964110616","https://openalex.org/W2964121744","https://openalex.org/W2964285770","https://openalex.org/W2968101724","https://openalex.org/W2981851019","https://openalex.org/W2984008963","https://openalex.org/W2986801687","https://openalex.org/W3035237998","https://openalex.org/W3035372819","https://openalex.org/W3035635319","https://openalex.org/W3081217025","https://openalex.org/W3090578762","https://openalex.org/W3092714372","https://openalex.org/W3096609285","https://openalex.org/W3098391431","https://openalex.org/W3111790582"],"related_works":["https://openalex.org/W4210416330","https://openalex.org/W2775506363","https://openalex.org/W3088136942","https://openalex.org/W2963177403","https://openalex.org/W4290852288","https://openalex.org/W2949362007","https://openalex.org/W4283207562","https://openalex.org/W2330246314","https://openalex.org/W2949522393","https://openalex.org/W4289422896"],"abstract_inverted_index":{"Video":[0],"procedural":[1,6,48],"captioning":[2,146,162],"(VPC),":[3],"which":[4,126,158],"generates":[5],"text":[7,49],"from":[8,50,187],"instructional":[9,56],"videos,":[10],"is":[11,26,68,91],"an":[12,55,128],"essential":[13,89],"task":[14],"for":[15,132],"scene":[16],"understanding":[17],"and":[18,58,140,183],"real-world":[19],"applications.":[20],"The":[21,88,181],"main":[22],"challenge":[23,39],"of":[24,54,66,154,170],"VPC":[25,44,124],"to":[27,30,92,111,172],"describe":[28],"how":[29],"manipulate":[31],"materials":[32,67,171],"accurately.":[33],"This":[34],"paper":[35],"focuses":[36],"on":[37],"this":[38,62],"by":[40,71],"designing":[41],"a":[42,47,102,122,137,144],"new":[43],"task,":[45,63],"generating":[46],"the":[51,64,106,114,152,155,167,175],"clip":[52],"sequence":[53],"video":[57,145,161],"material":[59,107,134],"list.":[60],"In":[61],"state":[65,179],"sequentially":[69],"changed":[70],"manipulations,":[72],"yielding":[73],"their":[74,178],"state-aware":[75],"visual":[76,95,138],"representations":[77,96],"(e.g.,":[78],"eggs":[79],"are":[80,185],"transformed":[81],"into":[82,97,143],"cracked,":[83],"stirred,":[84],"then":[85],"fried":[86],"forms).":[87],"difficulty":[90],"convert":[93],"such":[94],"textual":[98,130],"representations;":[99],"that":[100,174],"is,":[101],"model":[103],"should":[104],"track":[105],"states":[108,135],"after":[109],"manipulations":[110],"better":[112],"associate":[113],"cross-modal":[115],"relations.":[116],"To":[117],"achieve":[118],"this,":[119],"we":[120],"propose":[121],"novel":[123],"method,":[125,157],"modifies":[127],"existing":[129],"simulator":[131,139],"tracking":[133],"as":[136],"incorporates":[141],"it":[142],"model.":[147],"Our":[148],"experimental":[149],"results":[150],"show":[151],"effectiveness":[153],"proposed":[156],"outperforms":[159],"state-of-the-art":[160],"models.":[163],"We":[164],"further":[165],"analyze":[166],"learned":[168],"embedding":[169],"demonstrate":[173],"simulators":[176],"capture":[177],"transition.":[180],"code":[182],"dataset":[184],"available":[186],"https://github.com/misogil0116/svpc":[188]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":6},{"year":2022,"cited_by_count":3}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
