{"id":"https://openalex.org/W2755245431","doi":"https://doi.org/10.18653/v1/w17-3509","title":"Refer-iTTS: A System for Referring in Spoken Installments to Objects in Real-World Images","display_name":"Refer-iTTS: A System for Referring in Spoken Installments to Objects in Real-World Images","publication_year":2017,"publication_date":"2017-01-01","ids":{"openalex":"https://openalex.org/W2755245431","doi":"https://doi.org/10.18653/v1/w17-3509","mag":"2755245431"},"language":"en","primary_location":{"id":"doi:10.18653/v1/w17-3509","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/w17-3509","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 10th International Conference on Natural Language Generation","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.18653/v1/w17-3509","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5078051602","display_name":"Sina Zarrie\u00df","orcid":"https://orcid.org/0000-0002-1384-1218"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sina Zarrie\u00df","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049465032","display_name":"Maria Soledad Lopez Gambino","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"M. Soledad L\u00f3pez Gambino","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5032801642","display_name":"David Schlangen","orcid":"https://orcid.org/0000-0002-2686-6887"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"David Schlangen","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.13672792,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"72","last_page":"73"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8197927474975586},{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.6596563458442688},{"id":"https://openalex.org/keywords/generator","display_name":"Generator (circuit theory)","score":0.5910512208938599},{"id":"https://openalex.org/keywords/classifier","display_name":"Classifier (UML)","score":0.5348652005195618},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.46272578835487366},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4620354473590851},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4471490681171417},{"id":"https://openalex.org/keywords/interface","display_name":"Interface (matter)","score":0.4161451756954193}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8197927474975586},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.6596563458442688},{"id":"https://openalex.org/C2780992000","wikidata":"https://www.wikidata.org/wiki/Q17016113","display_name":"Generator (circuit theory)","level":3,"score":0.5910512208938599},{"id":"https://openalex.org/C95623464","wikidata":"https://www.wikidata.org/wiki/Q1096149","display_name":"Classifier (UML)","level":2,"score":0.5348652005195618},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.46272578835487366},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4620354473590851},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4471490681171417},{"id":"https://openalex.org/C113843644","wikidata":"https://www.wikidata.org/wiki/Q901882","display_name":"Interface (matter)","level":4,"score":0.4161451756954193},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C163258240","wikidata":"https://www.wikidata.org/wiki/Q25342","display_name":"Power (physics)","level":2,"score":0.0},{"id":"https://openalex.org/C129307140","wikidata":"https://www.wikidata.org/wiki/Q6795880","display_name":"Maximum bubble pressure method","level":3,"score":0.0},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.0},{"id":"https://openalex.org/C157915830","wikidata":"https://www.wikidata.org/wiki/Q2928001","display_name":"Bubble","level":2,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.18653/v1/w17-3509","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/w17-3509","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 10th International Conference on Natural Language Generation","raw_type":"proceedings-article"},{"id":"pmh:oai:pub.librecat.org:2913606","is_oa":true,"landing_page_url":"https://pub.uni-bielefeld.de/record/2913606","pdf_url":null,"source":{"id":"https://openalex.org/S4306401671","display_name":"PUB \u2013 Publications at Bielefeld University (Bielefeld University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I20121455","host_organization_name":"Bielefeld University","host_organization_lineage":["https://openalex.org/I20121455"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Zarrie\u00df S, Schlangen D. Refer-iTTS: A System for Referring in Spoken Installments to Objects in Real-World Images. In:  &lt;em&gt;Proceedings of INLG 2017 (demo papers)&lt;/em&gt;. 2017.","raw_type":"info:eu-repo/semantics/conferenceObject"}],"best_oa_location":{"id":"doi:10.18653/v1/w17-3509","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/w17-3509","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 10th International Conference on Natural Language Generation","raw_type":"proceedings-article"},"sustainable_development_goals":[{"display_name":"Quality Education","score":0.5600000023841858,"id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":10,"referenced_works":["https://openalex.org/W160853254","https://openalex.org/W1941338968","https://openalex.org/W1973933596","https://openalex.org/W2039207728","https://openalex.org/W2110930288","https://openalex.org/W2142813692","https://openalex.org/W2251512949","https://openalex.org/W2295431719","https://openalex.org/W2404187851","https://openalex.org/W2508818207"],"related_works":["https://openalex.org/W17155033","https://openalex.org/W3207760230","https://openalex.org/W1496222301","https://openalex.org/W1590307681","https://openalex.org/W4312814274","https://openalex.org/W4285370786","https://openalex.org/W2296488620","https://openalex.org/W2358353312","https://openalex.org/W2353836703","https://openalex.org/W41015297"],"abstract_inverted_index":{"Current":[0],"referring":[1,23,68],"expression":[2],"generation":[3,19],"systems":[4],"mostly":[5],"deliver":[6],"their":[7],"output":[8],"as":[9],"one-shot,":[10],"written":[11],"expressions.":[12],"We":[13,41],"present":[14],"on-going":[15],"work":[16,34],"on":[17],"incremental":[18,47,60],"of":[20],"spoken":[21],"expressions":[22,69],"to":[24,59],"objects":[25],"in":[26,45],"real-world":[27],"images.":[28],"This":[29],"approach":[30],"extends":[31],"upon":[32],"previous":[33],"using":[35],"the":[36],"words-as-classifier":[37],"model":[38],"for":[39],"generation.":[40],"implement":[42],"this":[43],"generator":[44],"an":[46,56],"dialogue":[48],"processing":[49],"framework":[50],"such":[51],"that":[52],"we":[53],"can":[54],"exploit":[55],"existing":[57],"interface":[58],"text-to-speech":[61],"synthesis.":[62],"Our":[63],"system":[64],"generates":[65],"and":[66],"synthesizes":[67],"while":[70],"continuously":[71],"observing":[72],"non-verbal":[73],"user":[74],"reactions.":[75]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
