{"id":"https://openalex.org/W3205822151","doi":"https://doi.org/10.1145/3474085.3475545","title":"Mask and Predict","display_name":"Mask and Predict","publication_year":2021,"publication_date":"2021-10-17","ids":{"openalex":"https://openalex.org/W3205822151","doi":"https://doi.org/10.1145/3474085.3475545","mag":"3205822151"},"language":"en","primary_location":{"id":"doi:10.1145/3474085.3475545","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3474085.3475545","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 29th ACM International Conference on Multimedia","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5019994406","display_name":"Hongshuo Tian","orcid":"https://orcid.org/0000-0001-7635-0961"},"institutions":[{"id":"https://openalex.org/I4210114541","display_name":"Tianjin People's Hospital","ror":"https://ror.org/01x62kg38","country_code":"CN","type":"healthcare","lineage":["https://openalex.org/I4210114541"]},{"id":"https://openalex.org/I162868743","display_name":"Tianjin University","ror":"https://ror.org/012tb2g32","country_code":"CN","type":"education","lineage":["https://openalex.org/I162868743"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Hongshuo Tian","raw_affiliation_strings":["Tianjin University &amp; People's Daily Online, Tianjin, China"],"affiliations":[{"raw_affiliation_string":"Tianjin University &amp; People's Daily Online, Tianjin, China","institution_ids":["https://openalex.org/I4210114541","https://openalex.org/I162868743"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5054900679","display_name":"Ning Xu","orcid":"https://orcid.org/0000-0002-7526-4356"},"institutions":[{"id":"https://openalex.org/I162868743","display_name":"Tianjin University","ror":"https://ror.org/012tb2g32","country_code":"CN","type":"education","lineage":["https://openalex.org/I162868743"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ning Xu","raw_affiliation_strings":["Tianjin University, Tianjin, China"],"affiliations":[{"raw_affiliation_string":"Tianjin University, Tianjin, China","institution_ids":["https://openalex.org/I162868743"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5081485810","display_name":"An-An Liu","orcid":"https://orcid.org/0000-0001-5755-9145"},"institutions":[{"id":"https://openalex.org/I162868743","display_name":"Tianjin University","ror":"https://ror.org/012tb2g32","country_code":"CN","type":"education","lineage":["https://openalex.org/I162868743"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"An-An Liu","raw_affiliation_strings":["Tianjin University, Tianjin, China"],"affiliations":[{"raw_affiliation_string":"Tianjin University, Tianjin, China","institution_ids":["https://openalex.org/I162868743"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5054311881","display_name":"Chenggang Yan","orcid":"https://orcid.org/0000-0003-1204-0512"},"institutions":[{"id":"https://openalex.org/I50760025","display_name":"Hangzhou Dianzi University","ror":"https://ror.org/0576gt767","country_code":"CN","type":"education","lineage":["https://openalex.org/I50760025"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chenggang Yan","raw_affiliation_strings":["Hangzhou Dianzi University, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Hangzhou Dianzi University, Hangzhou, China","institution_ids":["https://openalex.org/I50760025"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5023341829","display_name":"Zhendong Mao","orcid":"https://orcid.org/0000-0001-5739-8126"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]},{"id":"https://openalex.org/I92403157","display_name":"University of Science and Technology Beijing","ror":"https://ror.org/02egmk993","country_code":"CN","type":"education","lineage":["https://openalex.org/I92403157"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhendong Mao","raw_affiliation_strings":["University of Science and Technology of China, Beijing, China"],"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China, Beijing, China","institution_ids":["https://openalex.org/I92403157","https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100406673","display_name":"Quan Zhang","orcid":"https://orcid.org/0000-0002-0489-0456"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Quan Zhang","raw_affiliation_strings":["Peking University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5046305086","display_name":"Yongdong Zhang","orcid":"https://orcid.org/0000-0002-1151-1792"},"institutions":[{"id":"https://openalex.org/I92403157","display_name":"University of Science and Technology Beijing","ror":"https://ror.org/02egmk993","country_code":"CN","type":"education","lineage":["https://openalex.org/I92403157"]},{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yongdong Zhang","raw_affiliation_strings":["University of Science and Technology of China, Beijing, China"],"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China, Beijing, China","institution_ids":["https://openalex.org/I92403157","https://openalex.org/I126520041"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5019994406"],"corresponding_institution_ids":["https://openalex.org/I162868743","https://openalex.org/I4210114541"],"apc_list":null,"apc_paid":null,"fwci":0.9607,"has_fulltext":false,"cited_by_count":11,"citation_normalized_percentile":{"value":0.77702614,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"4128","last_page":"4136"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9965999722480774,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7809422016143799},{"id":"https://openalex.org/keywords/parsing","display_name":"Parsing","score":0.5997892022132874},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5903950333595276},{"id":"https://openalex.org/keywords/scene-graph","display_name":"Scene graph","score":0.5622166395187378},{"id":"https://openalex.org/keywords/visualization","display_name":"Visualization","score":0.5570870041847229},{"id":"https://openalex.org/keywords/graph","display_name":"Graph","score":0.5398598909378052},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.5011909008026123},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.4901040494441986},{"id":"https://openalex.org/keywords/visual-reasoning","display_name":"Visual reasoning","score":0.4384230971336365},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.38912978768348694},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3564547598361969},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.33225518465042114},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.25915825366973877},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.08643350005149841}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7809422016143799},{"id":"https://openalex.org/C186644900","wikidata":"https://www.wikidata.org/wiki/Q194152","display_name":"Parsing","level":2,"score":0.5997892022132874},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5903950333595276},{"id":"https://openalex.org/C179372163","wikidata":"https://www.wikidata.org/wiki/Q1406181","display_name":"Scene graph","level":3,"score":0.5622166395187378},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.5570870041847229},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.5398598909378052},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.5011909008026123},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.4901040494441986},{"id":"https://openalex.org/C2777508537","wikidata":"https://www.wikidata.org/wiki/Q7936620","display_name":"Visual reasoning","level":2,"score":0.4384230971336365},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.38912978768348694},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3564547598361969},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.33225518465042114},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.25915825366973877},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.08643350005149841},{"id":"https://openalex.org/C205711294","wikidata":"https://www.wikidata.org/wiki/Q176953","display_name":"Rendering (computer graphics)","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3474085.3475545","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3474085.3475545","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 29th ACM International Conference on Multimedia","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G3974818871","display_name":null,"funder_award_id":"61772359, 62002257","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8402877594","display_name":null,"funder_award_id":"2020YFB1406602","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320335777","display_name":"National Key Research and Development Program of China","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":42,"referenced_works":["https://openalex.org/W2076421233","https://openalex.org/W2077069816","https://openalex.org/W2277195237","https://openalex.org/W2479423890","https://openalex.org/W2521709538","https://openalex.org/W2546696630","https://openalex.org/W2591644541","https://openalex.org/W2606609115","https://openalex.org/W2607855566","https://openalex.org/W2620666060","https://openalex.org/W2788537604","https://openalex.org/W2806070179","https://openalex.org/W2886970679","https://openalex.org/W2887029921","https://openalex.org/W2950096400","https://openalex.org/W2951343884","https://openalex.org/W2953106684","https://openalex.org/W2962785943","https://openalex.org/W2962835968","https://openalex.org/W2963192850","https://openalex.org/W2963314968","https://openalex.org/W2963434219","https://openalex.org/W2963649796","https://openalex.org/W2963769536","https://openalex.org/W2963902384","https://openalex.org/W2963938081","https://openalex.org/W2964094751","https://openalex.org/W2964207259","https://openalex.org/W2987123286","https://openalex.org/W2988975212","https://openalex.org/W3008014442","https://openalex.org/W3010277541","https://openalex.org/W3034538190","https://openalex.org/W3034984754","https://openalex.org/W3035017890","https://openalex.org/W3081642947","https://openalex.org/W3093044239","https://openalex.org/W3096612875","https://openalex.org/W3106328333","https://openalex.org/W3108864070","https://openalex.org/W3173415456","https://openalex.org/W4285620459"],"related_works":["https://openalex.org/W2754155766","https://openalex.org/W4287854977","https://openalex.org/W2963192850","https://openalex.org/W2769151336","https://openalex.org/W4389782456","https://openalex.org/W2903371384","https://openalex.org/W3084841567","https://openalex.org/W4302773889","https://openalex.org/W4289753062","https://openalex.org/W3008700642"],"abstract_inverted_index":{"Scene":[0],"Graph":[1],"Generation":[2],"(SGG)":[3],"aims":[4],"to":[5,89,131,149,171,202],"parse":[6],"the":[7,20,27,31,35,72,86,122,127,142,151,182,219,222],"image":[8],"as":[9,34,49,65],"a":[10,66,80,95,209],"set":[11],"of":[12,59,74,158,162,165,196,221],"semantics,":[13],"containing":[14],"objects":[15,133],"and":[16,83,116,134,168,174,180,189,228],"their":[17,175],"relations.":[18],"Currently,":[19],"SGG":[21,64,105,129],"methods":[22,61],"only":[23,69],"stay":[24],"at":[25],"presenting":[26],"intuitive":[28,47,112,138,152,197],"detection":[29],"in":[30],"image,":[32],"such":[33],"triplet":[36],"\"logo":[37],"on":[38,55,79,215],"board\".":[39],"Intuitively,":[40],"we":[41,93,103,125,145,177],"humans":[42],"can":[43],"further":[44,231],"refine":[45,150],"these":[46],"detections":[48],"rational":[50,117,210],"descriptions":[51],"like":[52],"\"flower":[53],"painted":[54],"surfboard\".":[56],"However,":[57],"most":[58],"existing":[60],"always":[62],"formulate":[63],"straightforward":[67],"task,":[68],"limited":[70],"by":[71],"manner":[73,99],"one-time":[75],"prediction,":[76],"which":[77,185],"focuses":[78],"single-pass":[81],"pipeline":[82],"predicts":[84],"all":[85,195],"semantic.":[87],"Therefore,":[88],"handle":[90],"this":[91],"problem,":[92],"propose":[94],"novel":[96],"multi-step":[97,147],"reasoning":[98,148],"for":[100],"SGG.":[101],"Concretely,":[102],"break":[104],"into":[106],"two":[107,163],"explicit":[108],"learning":[109],"stages,":[110],"including":[111],"training":[113,118],"stage":[114,119],"(ITS)":[115],"(RTS).":[120],"In":[121,141],"first":[123],"stage,":[124,144],"follow":[126],"traditional":[128],"processing":[130],"detect":[132],"relationships,":[135],"yielding":[136,208],"an":[137],"scene":[139,153,211],"graph.":[140,154,212],"second":[143],"perform":[146],"For":[155],"each":[156],"step":[157],"reasoning,":[159],"it":[160],"consists":[161],"kinds":[164],"operations:":[166],"mask":[167,181],"predict.":[169],"According":[170],"primary":[172],"predictions":[173],"confidences,":[176,207],"constantly":[178],"select":[179],"low-confidence":[183],"predictions,":[184],"features":[186],"are":[187],"optimized":[188],"predicted":[190],"again.":[191],"After":[192],"several":[193],"iterations,":[194],"semantics":[198],"will":[199],"gradually":[200],"tend":[201],"be":[203],"revised":[204],"with":[205],"high":[206],"Extensive":[213],"experiments":[214],"Visual":[216],"Genome":[217],"prove":[218],"superiority":[220],"proposed":[223],"method.":[224],"Additional":[225],"ablation":[226],"studies":[227],"visualization":[229],"cases":[230],"validate":[232],"its":[233],"effectiveness.":[234]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":5},{"year":2022,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2021-10-25T00:00:00"}
