{"id":"https://openalex.org/W2914773199","doi":"https://doi.org/10.1145/3323873.3325036","title":"Multimodal Dialog for Browsing Large Visual Catalogs using Exploration-Exploitation Paradigm in a Joint Embedding Space","display_name":"Multimodal Dialog for Browsing Large Visual Catalogs using Exploration-Exploitation Paradigm in a Joint Embedding Space","publication_year":2019,"publication_date":"2019-06-05","ids":{"openalex":"https://openalex.org/W2914773199","doi":"https://doi.org/10.1145/3323873.3325036","mag":"2914773199"},"language":"en","primary_location":{"id":"doi:10.1145/3323873.3325036","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3323873.3325036","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3323873.3325036","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2019 on International Conference on Multimedia Retrieval","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3323873.3325036","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5067806540","display_name":"Indrani Bhattacharya","orcid":"https://orcid.org/0000-0003-1557-6961"},"institutions":[{"id":"https://openalex.org/I165799507","display_name":"Rensselaer Polytechnic Institute","ror":"https://ror.org/01rtyzb94","country_code":"US","type":"education","lineage":["https://openalex.org/I165799507"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Indrani Bhattacharya","raw_affiliation_strings":["Rensselaer Polytechnic Institute, Troy, NY, USA"],"affiliations":[{"raw_affiliation_string":"Rensselaer Polytechnic Institute, Troy, NY, USA","institution_ids":["https://openalex.org/I165799507"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5001747380","display_name":"Arkabandhu Chowdhury","orcid":null},"institutions":[{"id":"https://openalex.org/I74775410","display_name":"Rice University","ror":"https://ror.org/008zs3103","country_code":"US","type":"education","lineage":["https://openalex.org/I74775410"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Arkabandhu Chowdhury","raw_affiliation_strings":["Rice University, Houston, TX, USA"],"affiliations":[{"raw_affiliation_string":"Rice University, Houston, TX, USA","institution_ids":["https://openalex.org/I74775410"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5053798254","display_name":"Vikas C. Raykar","orcid":null},"institutions":[{"id":"https://openalex.org/I4210103279","display_name":"IBM Research - India","ror":"https://ror.org/014wt7r80","country_code":"IN","type":"facility","lineage":["https://openalex.org/I1341412227","https://openalex.org/I4210103279","https://openalex.org/I4210114115"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Vikas C. Raykar","raw_affiliation_strings":["IBM Research, Bangalore, India"],"affiliations":[{"raw_affiliation_string":"IBM Research, Bangalore, India","institution_ids":["https://openalex.org/I4210103279"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5067806540"],"corresponding_institution_ids":["https://openalex.org/I165799507"],"apc_list":null,"apc_paid":null,"fwci":0.3037,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.58304581,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"187","last_page":"191"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.9912999868392944,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/dialog-box","display_name":"Dialog box","score":0.8368369340896606},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8363445997238159},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.782341718673706},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.5963704586029053},{"id":"https://openalex.org/keywords/cosine-similarity","display_name":"Cosine similarity","score":0.5706063508987427},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5636706948280334},{"id":"https://openalex.org/keywords/dialog-system","display_name":"Dialog system","score":0.4901052415370941},{"id":"https://openalex.org/keywords/space","display_name":"Space (punctuation)","score":0.4864489734172821},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.47033366560935974},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.45875880122184753},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.4521194100379944},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.446059912443161},{"id":"https://openalex.org/keywords/joint","display_name":"Joint (building)","score":0.43856000900268555},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.3481599688529968},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.2942490577697754},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.23677119612693787},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.203187495470047}],"concepts":[{"id":"https://openalex.org/C173853756","wikidata":"https://www.wikidata.org/wiki/Q86915","display_name":"Dialog box","level":2,"score":0.8368369340896606},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8363445997238159},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.782341718673706},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.5963704586029053},{"id":"https://openalex.org/C2780762811","wikidata":"https://www.wikidata.org/wiki/Q1784941","display_name":"Cosine similarity","level":3,"score":0.5706063508987427},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5636706948280334},{"id":"https://openalex.org/C190954187","wikidata":"https://www.wikidata.org/wiki/Q5270587","display_name":"Dialog system","level":3,"score":0.4901052415370941},{"id":"https://openalex.org/C2778572836","wikidata":"https://www.wikidata.org/wiki/Q380933","display_name":"Space (punctuation)","level":2,"score":0.4864489734172821},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.47033366560935974},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.45875880122184753},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.4521194100379944},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.446059912443161},{"id":"https://openalex.org/C18555067","wikidata":"https://www.wikidata.org/wiki/Q8375051","display_name":"Joint (building)","level":2,"score":0.43856000900268555},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3481599688529968},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.2942490577697754},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.23677119612693787},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.203187495470047},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C170154142","wikidata":"https://www.wikidata.org/wiki/Q150737","display_name":"Architectural engineering","level":1,"score":0.0},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3323873.3325036","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3323873.3325036","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3323873.3325036","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2019 on International Conference on Multimedia Retrieval","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3323873.3325036","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3323873.3325036","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3323873.3325036","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2019 on International Conference on Multimedia Retrieval","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2914773199.pdf","grobid_xml":"https://content.openalex.org/works/W2914773199.grobid-xml"},"referenced_works_count":17,"referenced_works":["https://openalex.org/W771469340","https://openalex.org/W1686810756","https://openalex.org/W2100142570","https://openalex.org/W2116261113","https://openalex.org/W2168547291","https://openalex.org/W2200092826","https://openalex.org/W2518116149","https://openalex.org/W2547875792","https://openalex.org/W2558809543","https://openalex.org/W2625758617","https://openalex.org/W2783914869","https://openalex.org/W2798451620","https://openalex.org/W2802148124","https://openalex.org/W2897182555","https://openalex.org/W2950577311","https://openalex.org/W2953119472","https://openalex.org/W4249013746"],"related_works":["https://openalex.org/W48079147","https://openalex.org/W2394821827","https://openalex.org/W2563921006","https://openalex.org/W1963944933","https://openalex.org/W1600043506","https://openalex.org/W2111550420","https://openalex.org/W2549666521","https://openalex.org/W3133893348","https://openalex.org/W2920931047","https://openalex.org/W1977846844"],"abstract_inverted_index":{"We":[0,28,56,129],"present":[1],"a":[2,31,36,67,79,85],"multimodal":[3,89],"dialog":[4,72],"(MMD)":[5],"system":[6,112,134,155],"to":[7,21,66,124,171],"assist":[8,125],"online":[9],"customers":[10,20],"in":[11,40,54,84,126],"visually":[12],"browsing":[13,18],"through":[14],"large":[15,143],"catalogs.":[16],"Visual":[17],"allows":[19],"explore":[22],"products":[23,163],"beyond":[24],"exact":[25],"search":[26],"results.":[27],"focus":[29],"on":[30,70,135],"slightly":[32],"asymmetric":[33],"version":[34],"of":[35,60,116,158,169,179],"complete":[37],"MMD":[38,137],"system,":[39],"that":[41,139,153],"our":[42,58],"agent":[43],"can":[44],"understand":[45],"both":[46,104],"text":[47,106],"and":[48,102,107,119,131,148,160,176],"image":[49,109],"queries,":[50],"but":[51],"responds":[52],"only":[53],"images.":[55],"formulate":[57],"problem":[59],"\"showing":[61],"the":[62,71,105,108,114,117,133,154,172],"k":[63],"best":[64],"images":[65],"user'',":[68],"based":[69],"context":[73,115],"so":[74],"far,":[75],"as":[76],"sampling":[77],"from":[78,142],"Gaussian":[80],"Mixture":[81],"Model":[82],"(GMM)":[83],"high":[86],"dimensional":[87],"joint":[88,93],"embedding":[90,94],"space.":[91],"The":[92],"space":[95],"is":[96,156,177],"learned":[97],"by":[98],"Common":[99],"Representation":[100],"Learning":[101],"embeds":[103],"queries.":[110],"Our":[111,146],"remembers":[113],"dialog,":[118],"uses":[120],"an":[121,136,165],"exploration-exploitation":[122],"paradigm":[123],"visual":[127],"browsing.":[128],"train":[130],"evaluate":[132],"dataset":[138],"we":[140],"synthesize":[141],"catalog":[144],"data.":[145],"experiments":[147],"preliminary":[149],"human":[150,181],"evaluation":[151],"show":[152],"capable":[157,178],"learning":[159],"displaying":[161],"relevant":[162],"with":[164],"average":[166],"cosine":[167],"similarity":[168],"0.85":[170],"ground":[173],"truth":[174],"results,":[175],"engaging":[180],"users.":[182]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2021,"cited_by_count":3}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
