{"id":"https://openalex.org/W4402981217","doi":"https://doi.org/10.1109/icme57554.2024.10687709","title":"Manga109Dialog: A Large-Scale Dialogue Dataset for Comics Speaker Detection","display_name":"Manga109Dialog: A Large-Scale Dialogue Dataset for Comics Speaker Detection","publication_year":2024,"publication_date":"2024-07-15","ids":{"openalex":"https://openalex.org/W4402981217","doi":"https://doi.org/10.1109/icme57554.2024.10687709"},"language":"en","primary_location":{"id":"doi:10.1109/icme57554.2024.10687709","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icme57554.2024.10687709","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Multimedia and Expo (ICME)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5001638438","display_name":"Yingxuan Li","orcid":"https://orcid.org/0000-0002-9139-4177"},"institutions":[{"id":"https://openalex.org/I74801974","display_name":"The University of Tokyo","ror":"https://ror.org/057zh3y96","country_code":"JP","type":"education","lineage":["https://openalex.org/I74801974"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Yingxuan Li","raw_affiliation_strings":["The University of Tokyo"],"affiliations":[{"raw_affiliation_string":"The University of Tokyo","institution_ids":["https://openalex.org/I74801974"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5069982192","display_name":"Kiyoharu Aizawa","orcid":"https://orcid.org/0000-0003-2146-6275"},"institutions":[{"id":"https://openalex.org/I74801974","display_name":"The University of Tokyo","ror":"https://ror.org/057zh3y96","country_code":"JP","type":"education","lineage":["https://openalex.org/I74801974"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Kiyoharu Aizawa","raw_affiliation_strings":["The University of Tokyo"],"affiliations":[{"raw_affiliation_string":"The University of Tokyo","institution_ids":["https://openalex.org/I74801974"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5103144232","display_name":"Yusuke Matsui","orcid":"https://orcid.org/0000-0003-3977-4313"},"institutions":[{"id":"https://openalex.org/I74801974","display_name":"The University of Tokyo","ror":"https://ror.org/057zh3y96","country_code":"JP","type":"education","lineage":["https://openalex.org/I74801974"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Yusuke Matsui","raw_affiliation_strings":["The University of Tokyo"],"affiliations":[{"raw_affiliation_string":"The University of Tokyo","institution_ids":["https://openalex.org/I74801974"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5001638438"],"corresponding_institution_ids":["https://openalex.org/I74801974"],"apc_list":null,"apc_paid":null,"fwci":34.2961,"has_fulltext":false,"cited_by_count":12,"citation_normalized_percentile":{"value":0.99689343,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":97,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10759","display_name":"Translation Studies and Practices","score":0.9839000105857849,"subfield":{"id":"https://openalex.org/subfields/1203","display_name":"Language and Linguistics"},"field":{"id":"https://openalex.org/fields/12","display_name":"Arts and Humanities"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T10759","display_name":"Translation Studies and Practices","score":0.9839000105857849,"subfield":{"id":"https://openalex.org/subfields/1203","display_name":"Language and Linguistics"},"field":{"id":"https://openalex.org/fields/12","display_name":"Arts and Humanities"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9678000211715698,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12151","display_name":"Interpreting and Communication in Healthcare","score":0.9605000019073486,"subfield":{"id":"https://openalex.org/subfields/3600","display_name":"General Health Professions"},"field":{"id":"https://openalex.org/fields/36","display_name":"Health Professions"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/comics","display_name":"Comics","score":0.7901309728622437},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7794495820999146},{"id":"https://openalex.org/keywords/scale","display_name":"Scale (ratio)","score":0.6116268634796143},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5353017449378967},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.49782395362854004},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.43137574195861816},{"id":"https://openalex.org/keywords/geography","display_name":"Geography","score":0.04987403750419617},{"id":"https://openalex.org/keywords/cartography","display_name":"Cartography","score":0.04849359393119812}],"concepts":[{"id":"https://openalex.org/C529099274","wikidata":"https://www.wikidata.org/wiki/Q1004","display_name":"Comics","level":2,"score":0.7901309728622437},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7794495820999146},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.6116268634796143},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5353017449378967},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.49782395362854004},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.43137574195861816},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.04987403750419617},{"id":"https://openalex.org/C58640448","wikidata":"https://www.wikidata.org/wiki/Q42515","display_name":"Cartography","level":1,"score":0.04849359393119812}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icme57554.2024.10687709","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icme57554.2024.10687709","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Multimedia and Expo (ICME)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":14,"referenced_works":["https://openalex.org/W2030973401","https://openalex.org/W2064675550","https://openalex.org/W2077069816","https://openalex.org/W2115886797","https://openalex.org/W2277195237","https://openalex.org/W2294069344","https://openalex.org/W2479423890","https://openalex.org/W2549139847","https://openalex.org/W2565639579","https://openalex.org/W2890531016","https://openalex.org/W2963536419","https://openalex.org/W2963938081","https://openalex.org/W3017083351","https://openalex.org/W3035017890"],"related_works":["https://openalex.org/W3201315974","https://openalex.org/W3015688758","https://openalex.org/W3171682447","https://openalex.org/W3216780987","https://openalex.org/W2228920720","https://openalex.org/W610194060","https://openalex.org/W608774069","https://openalex.org/W2921140335","https://openalex.org/W4394253968","https://openalex.org/W3204019825"],"abstract_inverted_index":{"The":[0],"expanding":[1],"market":[2],"for":[3,12,27,54,112],"e-comics":[4],"has":[5],"driven":[6],"the":[7,17,48,73,80,84],"development":[8],"of":[9,20,76,104],"automated":[10,23],"methods":[11],"analyzing":[13],"comics.":[14,116],"To":[15,71],"enhance":[16],"machine\u2019s":[18],"understanding":[19],"comics,":[21,55,77],"an":[22],"method":[24,65],"is":[25,47],"essential":[26],"linking":[28],"text":[29],"in":[30,115],"comics":[31],"to":[32],"characters":[33],"that":[34,93],"speak":[35],"those":[36],"words.":[37],"In":[38],"this":[39],"study,":[40],"we":[41,78],"developed":[42],"Manga109Dialog<sup":[43],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[44],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">1</sup>,":[45],"which":[46],"world\u2019s":[49],"largest":[50],"speaker-to-text":[51],"annotation":[52],"dataset":[53],"containing":[56],"132,692":[57],"pairs.":[58],"We":[59],"proposed":[60],"a":[61,101,109],"novel":[62],"deep":[63],"learning-based":[64],"using":[66],"scene":[67],"graph":[68],"generation":[69],"models.":[70],"tailor":[72],"unique":[74],"features":[75],"enhanced":[79],"performance":[81],"by":[82],"considering":[83],"frame":[85],"reading":[86],"order.":[87],"Our":[88],"experiments":[89],"with":[90],"Manga109Dialog":[91],"show":[92],"our":[94],"scene-graph-based":[95],"approach":[96],"outperforms":[97],"existing":[98],"methods,":[99],"achieving":[100],"prediction":[102],"accuracy":[103],"over":[105],"75%,":[106],"thus":[107],"establishing":[108],"robust":[110],"benchmark":[111],"speaker":[113],"detection":[114]},"counts_by_year":[{"year":2025,"cited_by_count":4},{"year":2024,"cited_by_count":8}],"updated_date":"2025-12-27T23:08:20.325037","created_date":"2025-10-10T00:00:00"}
