{"id":"https://openalex.org/W4304080713","doi":"https://doi.org/10.1145/3503161.3548086","title":"PPMN: Pixel-Phrase Matching Network for One-Stage Panoptic Narrative Grounding","display_name":"PPMN: Pixel-Phrase Matching Network for One-Stage Panoptic Narrative Grounding","publication_year":2022,"publication_date":"2022-10-10","ids":{"openalex":"https://openalex.org/W4304080713","doi":"https://doi.org/10.1145/3503161.3548086"},"language":"en","primary_location":{"id":"doi:10.1145/3503161.3548086","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3503161.3548086","pdf_url":null,"source":{"id":"https://openalex.org/S4363608757","display_name":"Proceedings of the 30th ACM International Conference on Multimedia","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 30th ACM International Conference on Multimedia","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5002742139","display_name":"Zihan Ding","orcid":"https://orcid.org/0009-0005-2008-4816"},"institutions":[{"id":"https://openalex.org/I82880672","display_name":"Beihang University","ror":"https://ror.org/00wk2mp56","country_code":"CN","type":"education","lineage":["https://openalex.org/I82880672"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Zihan Ding","raw_affiliation_strings":["Beihang University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Beihang University, Beijing, China","institution_ids":["https://openalex.org/I82880672"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5002742139","display_name":"Zihan Ding","orcid":"https://orcid.org/0009-0005-2008-4816"},"institutions":[{"id":"https://openalex.org/I82880672","display_name":"Beihang University","ror":"https://ror.org/00wk2mp56","country_code":"CN","type":"education","lineage":["https://openalex.org/I82880672"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zi-han Ding","raw_affiliation_strings":["Beihang University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Beihang University, Beijing, China","institution_ids":["https://openalex.org/I82880672"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056811650","display_name":"Tianrui Hui","orcid":"https://orcid.org/0000-0002-1172-1554"},"institutions":[{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Tianrui Hui","raw_affiliation_strings":["Chinese Academy of Sciences &amp; University of Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Chinese Academy of Sciences &amp; University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210165038"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013673544","display_name":"Junshi Huang","orcid":"https://orcid.org/0000-0002-8395-1463"},"institutions":[{"id":"https://openalex.org/I4210087373","display_name":"Meizu (China)","ror":"https://ror.org/0067g4302","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210087373"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Junshi Huang","raw_affiliation_strings":["Meituan, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Meituan, Beijing, China","institution_ids":["https://openalex.org/I4210087373"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5069204507","display_name":"Xiaoming Wei","orcid":null},"institutions":[{"id":"https://openalex.org/I4210087373","display_name":"Meizu (China)","ror":"https://ror.org/0067g4302","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210087373"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaoming Wei","raw_affiliation_strings":["Meituan, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Meituan, Beijing, China","institution_ids":["https://openalex.org/I4210087373"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5009222019","display_name":"Xiaolin Wei","orcid":"https://orcid.org/0000-0002-3983-047X"},"institutions":[{"id":"https://openalex.org/I4210087373","display_name":"Meizu (China)","ror":"https://ror.org/0067g4302","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210087373"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaolin Wei","raw_affiliation_strings":["Meituan, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Meituan, Beijing, China","institution_ids":["https://openalex.org/I4210087373"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100330138","display_name":"Si Liu","orcid":"https://orcid.org/0000-0002-9180-2935"},"institutions":[{"id":"https://openalex.org/I82880672","display_name":"Beihang University","ror":"https://ror.org/00wk2mp56","country_code":"CN","type":"education","lineage":["https://openalex.org/I82880672"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Si Liu","raw_affiliation_strings":["Beihang University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Beihang University, Beijing, China","institution_ids":["https://openalex.org/I82880672"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5002742139"],"corresponding_institution_ids":["https://openalex.org/I82880672"],"apc_list":null,"apc_paid":null,"fwci":0.5997,"has_fulltext":false,"cited_by_count":11,"citation_normalized_percentile":{"value":0.76821617,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"5537","last_page":"5546"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9977999925613403,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9945999979972839,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7632799744606018},{"id":"https://openalex.org/keywords/phrase","display_name":"Phrase","score":0.6651788353919983},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6340181231498718},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.6139083504676819},{"id":"https://openalex.org/keywords/pixel","display_name":"Pixel","score":0.5815862417221069},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.5443331003189087},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4994680881500244},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.46357136964797974},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.44315558671951294},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.4231402575969696},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.38477396965026855},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.3263995349407196},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.09170746803283691}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7632799744606018},{"id":"https://openalex.org/C2776224158","wikidata":"https://www.wikidata.org/wiki/Q187931","display_name":"Phrase","level":2,"score":0.6651788353919983},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6340181231498718},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.6139083504676819},{"id":"https://openalex.org/C160633673","wikidata":"https://www.wikidata.org/wiki/Q355198","display_name":"Pixel","level":2,"score":0.5815862417221069},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.5443331003189087},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4994680881500244},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.46357136964797974},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.44315558671951294},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.4231402575969696},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.38477396965026855},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3263995349407196},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.09170746803283691},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3503161.3548086","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3503161.3548086","pdf_url":null,"source":{"id":"https://openalex.org/S4363608757","display_name":"Proceedings of the 30th ACM International Conference on Multimedia","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 30th ACM International Conference on Multimedia","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Reduced inequalities","id":"https://metadata.un.org/sdg/10","score":0.7099999785423279}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":55,"referenced_works":["https://openalex.org/W1861492603","https://openalex.org/W1903029394","https://openalex.org/W1933349210","https://openalex.org/W1963826206","https://openalex.org/W2194775991","https://openalex.org/W2302548814","https://openalex.org/W2565639579","https://openalex.org/W2606473278","https://openalex.org/W2910628332","https://openalex.org/W2922509574","https://openalex.org/W2952524542","https://openalex.org/W2962764817","https://openalex.org/W2963037989","https://openalex.org/W2963080533","https://openalex.org/W2963150697","https://openalex.org/W2963717374","https://openalex.org/W2963783181","https://openalex.org/W2964276121","https://openalex.org/W2965182628","https://openalex.org/W2983358816","https://openalex.org/W2986755220","https://openalex.org/W2987734933","https://openalex.org/W2999219213","https://openalex.org/W3034325957","https://openalex.org/W3034355852","https://openalex.org/W3034772468","https://openalex.org/W3034975706","https://openalex.org/W3037533539","https://openalex.org/W3048835936","https://openalex.org/W3083600713","https://openalex.org/W3090449556","https://openalex.org/W3092961994","https://openalex.org/W3093017735","https://openalex.org/W3093025045","https://openalex.org/W3095670406","https://openalex.org/W3096609285","https://openalex.org/W3106546328","https://openalex.org/W3107094551","https://openalex.org/W3108748824","https://openalex.org/W3110435696","https://openalex.org/W3159619744","https://openalex.org/W3168649818","https://openalex.org/W3177271687","https://openalex.org/W3177892185","https://openalex.org/W3178075329","https://openalex.org/W3205869909","https://openalex.org/W3206209177","https://openalex.org/W3206369173","https://openalex.org/W3207127495","https://openalex.org/W4214490042","https://openalex.org/W4214530390","https://openalex.org/W4225348272","https://openalex.org/W4250482878","https://openalex.org/W4283029876","https://openalex.org/W4312690830"],"related_works":["https://openalex.org/W2378211422","https://openalex.org/W4321353415","https://openalex.org/W2745001401","https://openalex.org/W2039546652","https://openalex.org/W2130974462","https://openalex.org/W2028665553","https://openalex.org/W2086519370","https://openalex.org/W972276598","https://openalex.org/W4246352526","https://openalex.org/W2121910908"],"abstract_inverted_index":{"Panoptic":[0],"Narrative":[1],"Grounding":[2],"(PNG)":[3],"is":[4,10],"an":[5,39],"emerging":[6],"task":[7],"whose":[8],"goal":[9],"to":[11,49,116,166,187],"segment":[12],"visual":[13,192],"objects":[14],"of":[15,25,68,78,121,145,172],"things":[16,93],"and":[17,75,94,124,137],"stuff":[18,95],"categories":[19,96],"described":[20],"by":[21,38,82,128],"dense":[22],"narrative":[23],"captions":[24],"a":[26,104,160],"still":[27],"image.":[28],"The":[29],"previous":[30],"two-stage":[31,60],"approach":[32],"first":[33,73],"extracts":[34],"segmentation":[35,42,127],"region":[36,83,122],"proposals":[37,70,123],"off-the-shelf":[40],"panoptic":[41,126],"model,":[43],"then":[44],"conducts":[45],"coarse":[46],"region-phrase":[47,153],"matching":[48],"ground":[50],"the":[51,59,65,72,76,143,169,180,190,205],"candidate":[52],"regions":[53],"for":[54,92,184],"each":[55,114,185],"noun":[56],"phrase.":[57],"However,":[58],"pipeline":[61],"usually":[62],"suffers":[63],"from":[64,142],"performance":[66,203],"limitation":[67],"low-quality":[69],"in":[71],"stage":[74],"loss":[77],"spatial":[79],"details":[80],"caused":[81],"feature":[84],"pooling,":[85],"as":[86,88],"well":[87],"complicated":[89],"strategies":[90],"designed":[91],"separately.":[97],"To":[98],"alleviate":[99],"these":[100],"drawbacks,":[101],"we":[102,157],"propose":[103,159],"one-stage":[105],"end-to-end":[106],"Pixel-Phrase":[107],"Matching":[108],"Network":[109],"(PPMN),":[110],"which":[111,178],"directly":[112],"matches":[113],"phrase":[115,173,186],"its":[117],"corresponding":[118,191],"pixels":[119,183],"instead":[120],"outputs":[125],"simple":[129],"combination.":[130],"Thus,":[131],"our":[132,198],"model":[133],"can":[134],"exploit":[135],"sufficient":[136],"finer":[138],"cross-modal":[139],"semantic":[140],"correspondence":[141],"supervision":[144],"densely":[146],"annotated":[147],"pixel-phrase":[148],"pairs":[149],"rather":[150],"than":[151],"sparse":[152],"pairs.":[154],"In":[155],"addition,":[156],"also":[158],"Language-Compatible":[161],"Pixel":[162],"Aggregation":[163],"(LCPA)":[164],"module":[165],"further":[167],"enhance":[168],"discriminative":[170],"ability":[171],"features":[174],"through":[175],"multi-round":[176],"refinement,":[177],"selects":[179],"most":[181],"compatible":[182],"adaptively":[188],"aggregate":[189],"context.":[193],"Extensive":[194],"experiments":[195],"show":[196],"that":[197],"method":[199],"achieves":[200],"new":[201],"state-of-the-art":[202],"on":[204],"PNG":[206],"benchmark":[207],"with":[208],"4.0":[209],"absolute":[210],"Average":[211],"Recall":[212],"gains.":[213]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":4},{"year":2023,"cited_by_count":4}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
