{"id":"https://openalex.org/W4313017417","doi":"https://doi.org/10.1145/3556223.3556226","title":"Multiple-Perspective Caption Generation with Initial Attention Weights","display_name":"Multiple-Perspective Caption Generation with Initial Attention Weights","publication_year":2022,"publication_date":"2022-07-29","ids":{"openalex":"https://openalex.org/W4313017417","doi":"https://doi.org/10.1145/3556223.3556226"},"language":"en","primary_location":{"id":"doi:10.1145/3556223.3556226","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3556223.3556226","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"The 10th International Conference on Computer and Communications Management","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5033636269","display_name":"Hidekazu Yanagimoto","orcid":"https://orcid.org/0000-0001-5766-5829"},"institutions":[{"id":"https://openalex.org/I4387152983","display_name":"Osaka Metropolitan University","ror":"https://ror.org/01hvx5h04","country_code":"JP","type":"education","lineage":["https://openalex.org/I4387152983"]},{"id":"https://openalex.org/I69740276","display_name":"Tokyo Metropolitan University","ror":"https://ror.org/00ws30h19","country_code":"JP","type":"education","lineage":["https://openalex.org/I69740276"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Hidekazu Yanagimoto","raw_affiliation_strings":["Graduate School of Informatics, Osaka Metropolitan University, Japan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Graduate School of Informatics, Osaka Metropolitan University, Japan","institution_ids":["https://openalex.org/I69740276","https://openalex.org/I4387152983"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100992682","display_name":"Tsubasa Imai","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tsubasa Imai","raw_affiliation_strings":["DXC Technology Japan Ltd., Japan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"DXC Technology Japan Ltd., Japan","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.3046,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.56237811,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"19","last_page":"23"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9966999888420105,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9914000034332275,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/perspective","display_name":"Perspective (graphical)","score":0.8190550208091736},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8178584575653076},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.748315691947937},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6713525056838989},{"id":"https://openalex.org/keywords/subject","display_name":"Subject (documents)","score":0.5430670380592346},{"id":"https://openalex.org/keywords/control","display_name":"Control (management)","score":0.52585369348526},{"id":"https://openalex.org/keywords/object-detection","display_name":"Object detection","score":0.521876871585846},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.5212391018867493},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.41123372316360474},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.41067826747894287},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.3612404465675354},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.2811840772628784},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.06685465574264526}],"concepts":[{"id":"https://openalex.org/C12713177","wikidata":"https://www.wikidata.org/wiki/Q1900281","display_name":"Perspective (graphical)","level":2,"score":0.8190550208091736},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8178584575653076},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.748315691947937},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6713525056838989},{"id":"https://openalex.org/C2777855551","wikidata":"https://www.wikidata.org/wiki/Q12310021","display_name":"Subject (documents)","level":2,"score":0.5430670380592346},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.52585369348526},{"id":"https://openalex.org/C2776151529","wikidata":"https://www.wikidata.org/wiki/Q3045304","display_name":"Object detection","level":3,"score":0.521876871585846},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.5212391018867493},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.41123372316360474},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.41067826747894287},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3612404465675354},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.2811840772628784},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.06685465574264526},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3556223.3556226","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3556223.3556226","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"The 10th International Conference on Computer and Communications Management","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":7,"referenced_works":["https://openalex.org/W1861492603","https://openalex.org/W1902237438","https://openalex.org/W2163922914","https://openalex.org/W2619383789","https://openalex.org/W3163906087","https://openalex.org/W4385245566","https://openalex.org/W6739901393"],"related_works":["https://openalex.org/W2748952813","https://openalex.org/W3205449125","https://openalex.org/W2358198472","https://openalex.org/W2492979768","https://openalex.org/W2188183077","https://openalex.org/W2732813147","https://openalex.org/W2143460112","https://openalex.org/W2042906257","https://openalex.org/W4292830139","https://openalex.org/W4319309705"],"abstract_inverted_index":{"Caption":[0],"generation":[1,22,101,169],"is":[2],"one":[3],"of":[4,15,46,62,93,160,178,201],"multimodal":[5],"learning":[6,10],"tasks":[7],"and":[8,26,133,145,174,198,225],"deep":[9],"contributes":[11],"to":[12,83,108,118,125,232],"the":[13,16,35,38,47,63,66,74,84,90,94,104,112,120,127,147,154,158,161,166,176,182,190,199,202,213,221,227,242],"improvement":[14],"caption":[17,21,72,100,134,168,180],"generation.":[18,135],"However,":[19],"usual":[20],"systems":[23],"receive":[24],"images":[25],"generate":[27,54,119,233],"their":[28],"captions":[29,39,56,110,151,235],"automatically.":[30],"So,":[31],"it":[32],"depends":[33],"on":[34,43,58,241],"system":[36,67,102,170,215,229],"whether":[37],"are":[40,49],"generated":[41,189],"based":[42,57,240],"where":[44],"parts":[45],"image":[48,64],"focused.":[50],"Human":[51],"captioners":[52],"can":[53],"multiple":[55],"different":[59,237],"focusing":[60,91,139],"points":[61,92,140],"but":[65],"generates":[68],"only":[69],"a":[70,98,179],"single":[71],"from":[73,141,194],"image.":[75,95],"To":[76],"overcome":[77],"this":[78],"problem,":[79],"we":[80,116,137,164,188,211],"pay":[81],"attention":[82,86,106,113,122,172,184,192,204,223,244],"initial":[85,105,121,183,191,203,222,243],"weights,":[87],"which":[88],"denote":[89],"We":[96],"propose":[97],"multiple-perspective":[99,167,234],"with":[103,150,157,171,207,216,236],"weights":[107,123,193,224],"control":[109,175],"via":[111],"mechanism.":[114],"Moreover,":[115,210],"try":[117],"automatically":[124],"use":[126],"general":[128],"corpus":[129],"for":[130],"object":[131,143,149,155],"detection":[132],"First,":[136],"determine":[138],"detected":[142,148],"positions":[144],"connect":[146],"by":[152],"comparing":[153],"tag":[156],"subject":[159,177,238],"caption.":[162],"Second,":[163],"develop":[165],"mechanism":[173],"using":[181],"weights.":[185,245],"In":[186],"experiments,":[187],"MS":[195,217],"COCO":[196,218],"dataset":[197,219],"accuracy":[200],"achieved":[205],"94.3%":[206],"human":[208],"judgment.":[209],"trained":[212],"proposed":[214,228],"including":[220],"confirmed":[226],"was":[230],"able":[231],"words":[239]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
