{"id":"https://openalex.org/W2895249420","doi":"https://doi.org/10.1145/3242969.3243026","title":"Multimodal Representation of Advertisements Using Segment-level Autoencoders","display_name":"Multimodal Representation of Advertisements Using Segment-level Autoencoders","publication_year":2018,"publication_date":"2018-10-02","ids":{"openalex":"https://openalex.org/W2895249420","doi":"https://doi.org/10.1145/3242969.3243026","mag":"2895249420"},"language":"en","primary_location":{"id":"doi:10.1145/3242969.3243026","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3242969.3243026","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 20th ACM International Conference on Multimodal Interaction","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5008305849","display_name":"Krishna Somandepalli","orcid":"https://orcid.org/0000-0002-2845-1079"},"institutions":[{"id":"https://openalex.org/I1174212","display_name":"University of Southern California","ror":"https://ror.org/03taz7m60","country_code":"US","type":"education","lineage":["https://openalex.org/I1174212"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Krishna Somandepalli","raw_affiliation_strings":["University of Southern California, Los Angeles, CA, USA"],"affiliations":[{"raw_affiliation_string":"University of Southern California, Los Angeles, CA, USA","institution_ids":["https://openalex.org/I1174212"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101642395","display_name":"Victor R. Mart\u00ednez","orcid":"https://orcid.org/0000-0003-1360-4711"},"institutions":[{"id":"https://openalex.org/I1174212","display_name":"University of Southern California","ror":"https://ror.org/03taz7m60","country_code":"US","type":"education","lineage":["https://openalex.org/I1174212"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Victor Martinez","raw_affiliation_strings":["University of Southern California, Los Angeles, CA, USA"],"affiliations":[{"raw_affiliation_string":"University of Southern California, Los Angeles, CA, USA","institution_ids":["https://openalex.org/I1174212"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5079573104","display_name":"Naveen Kumar","orcid":"https://orcid.org/0000-0003-4745-4205"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Naveen Kumar","raw_affiliation_strings":["Sony Interactive Entertainment America, Los Angeles, CA, USA"],"affiliations":[{"raw_affiliation_string":"Sony Interactive Entertainment America, Los Angeles, CA, USA","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5103555640","display_name":"Shrikanth Narayanan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shrikanth Narayanan","raw_affiliation_strings":["Sony Interactive Entertainment America, Los Angeles, CA, USA"],"affiliations":[{"raw_affiliation_string":"Sony Interactive Entertainment America, Los Angeles, CA, USA","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5008305849"],"corresponding_institution_ids":["https://openalex.org/I1174212"],"apc_list":null,"apc_paid":null,"fwci":0.8257,"has_fulltext":false,"cited_by_count":8,"citation_normalized_percentile":{"value":0.73811576,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"418","last_page":"422"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9980999827384949,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11795","display_name":"Humor Studies and Applications","score":0.9930999875068665,"subfield":{"id":"https://openalex.org/subfields/3207","display_name":"Social Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7712745070457458},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.6689115166664124},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6225305795669556},{"id":"https://openalex.org/keywords/modal","display_name":"Modal","score":0.5941217541694641},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.5843128561973572},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5223066210746765},{"id":"https://openalex.org/keywords/modalities","display_name":"Modalities","score":0.4730873703956604},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4576493799686432},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.35668647289276123},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.3304404020309448}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7712745070457458},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.6689115166664124},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6225305795669556},{"id":"https://openalex.org/C71139939","wikidata":"https://www.wikidata.org/wiki/Q910194","display_name":"Modal","level":2,"score":0.5941217541694641},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.5843128561973572},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5223066210746765},{"id":"https://openalex.org/C2779903281","wikidata":"https://www.wikidata.org/wiki/Q6888026","display_name":"Modalities","level":2,"score":0.4730873703956604},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4576493799686432},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.35668647289276123},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.3304404020309448},{"id":"https://openalex.org/C36289849","wikidata":"https://www.wikidata.org/wiki/Q34749","display_name":"Social science","level":1,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C188027245","wikidata":"https://www.wikidata.org/wiki/Q750446","display_name":"Polymer chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3242969.3243026","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3242969.3243026","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 20th ACM International Conference on Multimodal Interaction","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320321739","display_name":"Consejo Nacional de Ciencia y Tecnolog\u00eda","ror":"https://ror.org/059ex5q34"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":31,"referenced_works":["https://openalex.org/W24089286","https://openalex.org/W109925008","https://openalex.org/W1499019550","https://openalex.org/W1522301498","https://openalex.org/W1522734439","https://openalex.org/W1686810756","https://openalex.org/W1984170705","https://openalex.org/W1995609428","https://openalex.org/W2016053056","https://openalex.org/W2032296614","https://openalex.org/W2081345780","https://openalex.org/W2087835472","https://openalex.org/W2119288237","https://openalex.org/W2130162821","https://openalex.org/W2133564696","https://openalex.org/W2133913337","https://openalex.org/W2144345993","https://openalex.org/W2184188583","https://openalex.org/W2260521078","https://openalex.org/W2295107390","https://openalex.org/W2530144925","https://openalex.org/W2593116425","https://openalex.org/W2593390416","https://openalex.org/W2594690981","https://openalex.org/W2619383789","https://openalex.org/W2782996367","https://openalex.org/W2949888546","https://openalex.org/W2963037330","https://openalex.org/W3020986461","https://openalex.org/W3105782478","https://openalex.org/W4299828299"],"related_works":["https://openalex.org/W2185469136","https://openalex.org/W4306353150","https://openalex.org/W8219677","https://openalex.org/W3216879894","https://openalex.org/W2168054807","https://openalex.org/W2058990474","https://openalex.org/W2026860389","https://openalex.org/W2043363698","https://openalex.org/W4301143707","https://openalex.org/W2952745240"],"abstract_inverted_index":{"Automatic":[0],"analysis":[1],"of":[2,16,21,56,97,110,126,130,176,185],"advertisements":[3],"(ads)":[4],"poses":[5],"an":[6,57,99,183],"interesting":[7],"problem":[8],"for":[9],"learning":[10],"multimodal":[11,63,91,140,162],"representations.":[12,31,45],"A":[13],"promising":[14],"direction":[15],"research":[17],"is":[18],"the":[19,54,76,90,95,118,131,139,148,160,174,177],"development":[20],"deep":[22],"neural":[23],"network":[24],"autoencoders":[25,66,120,168],"to":[26,39,59,88,155,192],"obtain":[27,40,60,182],"inter-modal":[28],"and":[29,43,50,78,83,144,158,169],"intra-modal":[30],"In":[32,86],"this":[33,114],"work,":[34],"we":[35,93,116,151],"propose":[36],"a":[37,61,106,122,193],"system":[38],"segment-level":[41,119,161],"unimodal":[42,145],"joint":[44,143],"These":[46],"features":[47],"are":[48,67,153],"concatenated,":[49],"then":[51],"averaged":[52],"across":[53],"duration":[55],"ad":[58,100],"single":[62],"representation.":[64],"The":[65],"trained":[68],"using":[69],"segments":[70],"generated":[71],"by":[72],"time-aligning":[73],"frames":[74],"between":[75],"audio":[77],"video":[79],"modalities":[80],"with":[81],"forward":[82],"backward":[84],"context.":[85],"order":[87],"assess":[89],"representations,":[92,146],"consider":[94],"tasks":[96],"classifying":[98],"as":[101],"funny":[102],"or":[103],"exciting":[104],"in":[105,188],"publicly":[107],"available":[108],"dataset":[109,125],"2,720":[111],"ads.":[112],"For":[113],"purpose":[115],"train":[117],"on":[121],"larger,":[123],"unlabeled":[124],"9,740":[127],"ads,":[128],"agnostic":[129],"test":[132],"set.":[133],"Our":[134],"experiments":[135],"show":[136],"that:":[137],"1)":[138],"representations":[141,150,163,171],"outperform":[142],"2)":[147],"different":[149],"learn":[152],"complementary":[154],"each":[156],"other,":[157],"3)":[159],"perform":[164],"better":[165],"than":[166],"classical":[167],"cross-modal":[170],"--":[172],"within":[173],"context":[175],"two":[178],"classification":[179,189],"tasks.":[180],"We":[181],"improvement":[184],"about":[186],"5%":[187],"accuracy":[190],"compared":[191],"competitive":[194],"baseline.":[195]},"counts_by_year":[{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":3},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
