{"id":"https://openalex.org/W3015215494","doi":"https://doi.org/10.1109/jstsp.2020.2987417","title":"Direct Speech-to-Image Translation","display_name":"Direct Speech-to-Image Translation","publication_year":2020,"publication_date":"2020-03-01","ids":{"openalex":"https://openalex.org/W3015215494","doi":"https://doi.org/10.1109/jstsp.2020.2987417","mag":"3015215494"},"language":"en","primary_location":{"id":"doi:10.1109/jstsp.2020.2987417","is_oa":false,"landing_page_url":"https://doi.org/10.1109/jstsp.2020.2987417","pdf_url":null,"source":{"id":"https://openalex.org/S42167783","display_name":"IEEE Journal of Selected Topics in Signal Processing","issn_l":"1932-4553","issn":["1932-4553","1941-0484"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Journal of Selected Topics in Signal Processing","raw_type":"journal-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2004.03413","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5103173334","display_name":"Jiguo Li","orcid":"https://orcid.org/0000-0002-1447-4798"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]},{"id":"https://openalex.org/I4210090176","display_name":"Institute of Computing Technology","ror":"https://ror.org/0090r4d87","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210090176"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Jiguo Li","raw_affiliation_strings":["Key Lab of Intelligent Information Processing, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China","National Engineering Laboratory for Video Technology, School of Electronic Engineering and Computer Science, Peking University, Beijing, China","University of Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Key Lab of Intelligent Information Processing, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210090176","https://openalex.org/I19820366"]},{"raw_affiliation_string":"National Engineering Laboratory for Video Technology, School of Electronic Engineering and Computer Science, Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]},{"raw_affiliation_string":"University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210165038"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5055937409","display_name":"Xinfeng Zhang","orcid":"https://orcid.org/0000-0002-7517-3868"},"institutions":[{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xinfeng Zhang","raw_affiliation_strings":["School of Computer Science and Technology, University of Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210165038"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040292663","display_name":"Chuanmin Jia","orcid":"https://orcid.org/0000-0002-7418-6245"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]},{"id":"https://openalex.org/I4210136793","display_name":"Peng Cheng Laboratory","ror":"https://ror.org/03qdqbt06","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210136793"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chuanmin Jia","raw_affiliation_strings":["Institute of Digital Media, School of Electronic Engineering and Computer Science, Peking University, Beijing, China","Peng Cheng Lab, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Institute of Digital Media, School of Electronic Engineering and Computer Science, Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]},{"raw_affiliation_string":"Peng Cheng Lab, Shenzhen, China","institution_ids":["https://openalex.org/I4210136793"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101727196","display_name":"Jizheng Xu","orcid":"https://orcid.org/0009-0005-4563-6787"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jizheng Xu","raw_affiliation_strings":["Bytedance Inc., Beijing, China"],"affiliations":[{"raw_affiliation_string":"Bytedance Inc., Beijing, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100425726","display_name":"Li Zhang","orcid":"https://orcid.org/0000-0003-2118-4876"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li Zhang","raw_affiliation_strings":["Bytedance Inc., Beijing, China"],"affiliations":[{"raw_affiliation_string":"Bytedance Inc., Beijing, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100701548","display_name":"Yue Wang","orcid":"https://orcid.org/0000-0001-8740-1592"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yue Wang","raw_affiliation_strings":["Bytedance Inc., Beijing, China"],"affiliations":[{"raw_affiliation_string":"Bytedance Inc., Beijing, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5039832462","display_name":"Siwei Ma","orcid":"https://orcid.org/0000-0002-2731-5403"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]},{"id":"https://openalex.org/I4210136793","display_name":"Peng Cheng Laboratory","ror":"https://ror.org/03qdqbt06","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210136793"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Siwei Ma","raw_affiliation_strings":["Institute of Digital Media, School of Electronic Engineering and Computer Science, Peking University, Beijing, China","Peng Cheng Lab, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Institute of Digital Media, School of Electronic Engineering and Computer Science, Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]},{"raw_affiliation_string":"Peng Cheng Lab, Shenzhen, China","institution_ids":["https://openalex.org/I4210136793"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5018478553","display_name":"Wen Gao","orcid":"https://orcid.org/0000-0002-8070-802X"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]},{"id":"https://openalex.org/I4210136793","display_name":"Peng Cheng Laboratory","ror":"https://ror.org/03qdqbt06","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210136793"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wen Gao","raw_affiliation_strings":["Institute of Digital Media, School of Electronic Engineering and Computer Science, Peking University, Beijing, China","Peng Cheng Lab, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Institute of Digital Media, School of Electronic Engineering and Computer Science, Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]},{"raw_affiliation_string":"Peng Cheng Lab, Shenzhen, China","institution_ids":["https://openalex.org/I4210136793"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5103173334"],"corresponding_institution_ids":["https://openalex.org/I19820366","https://openalex.org/I20231570","https://openalex.org/I4210090176","https://openalex.org/I4210165038"],"apc_list":null,"apc_paid":null,"fwci":2.7479,"has_fulltext":false,"cited_by_count":41,"citation_normalized_percentile":{"value":0.9196399,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":100},"biblio":{"volume":"14","issue":"3","first_page":"517","last_page":"529"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.9973999857902527,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.9973999857902527,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9900000095367432,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.989300012588501,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8360815644264221},{"id":"https://openalex.org/keywords/image-translation","display_name":"Image translation","score":0.638757586479187},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6362683773040771},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.5747260451316833},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5686410665512085},{"id":"https://openalex.org/keywords/direct-speech","display_name":"Direct speech","score":0.5350445508956909},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.49386197328567505},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.4847114086151123},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.48027896881103516},{"id":"https://openalex.org/keywords/transcription","display_name":"Transcription (linguistics)","score":0.4625418186187744},{"id":"https://openalex.org/keywords/translation","display_name":"Translation (biology)","score":0.4536038637161255},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.42441269755363464},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4221249222755432},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.4154198467731476},{"id":"https://openalex.org/keywords/feature-learning","display_name":"Feature learning","score":0.4111623764038086},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.404948353767395},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.37178146839141846}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8360815644264221},{"id":"https://openalex.org/C2779757391","wikidata":"https://www.wikidata.org/wiki/Q6002292","display_name":"Image translation","level":3,"score":0.638757586479187},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6362683773040771},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.5747260451316833},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5686410665512085},{"id":"https://openalex.org/C51764019","wikidata":"https://www.wikidata.org/wiki/Q283987","display_name":"Direct speech","level":2,"score":0.5350445508956909},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.49386197328567505},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.4847114086151123},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.48027896881103516},{"id":"https://openalex.org/C179926584","wikidata":"https://www.wikidata.org/wiki/Q207714","display_name":"Transcription (linguistics)","level":2,"score":0.4625418186187744},{"id":"https://openalex.org/C149364088","wikidata":"https://www.wikidata.org/wiki/Q185917","display_name":"Translation (biology)","level":4,"score":0.4536038637161255},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.42441269755363464},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4221249222755432},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.4154198467731476},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.4111623764038086},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.404948353767395},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.37178146839141846},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C105580179","wikidata":"https://www.wikidata.org/wiki/Q188928","display_name":"Messenger RNA","level":3,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/jstsp.2020.2987417","is_oa":false,"landing_page_url":"https://doi.org/10.1109/jstsp.2020.2987417","pdf_url":null,"source":{"id":"https://openalex.org/S42167783","display_name":"IEEE Journal of Selected Topics in Signal Processing","issn_l":"1932-4553","issn":["1932-4553","1941-0484"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Journal of Selected Topics in Signal Processing","raw_type":"journal-article"},{"id":"pmh:oai:arXiv.org:2004.03413","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2004.03413","pdf_url":"https://arxiv.org/pdf/2004.03413","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2004.03413","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2004.03413","pdf_url":"https://arxiv.org/pdf/2004.03413","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[{"display_name":"Quality Education","score":0.8700000047683716,"id":"https://metadata.un.org/sdg/4"}],"awards":[{"id":"https://openalex.org/G3634038634","display_name":null,"funder_award_id":"61632001","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5136785144","display_name":null,"funder_award_id":"61961130392","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":88,"referenced_works":["https://openalex.org/W1494198834","https://openalex.org/W1797268635","https://openalex.org/W1821462560","https://openalex.org/W1861492603","https://openalex.org/W1899504021","https://openalex.org/W1922655562","https://openalex.org/W1970208997","https://openalex.org/W1989549063","https://openalex.org/W2009475852","https://openalex.org/W2062955551","https://openalex.org/W2063303346","https://openalex.org/W2078179989","https://openalex.org/W2097117768","https://openalex.org/W2099471712","https://openalex.org/W2102605133","https://openalex.org/W2108598243","https://openalex.org/W2125389028","https://openalex.org/W2134670479","https://openalex.org/W2134797427","https://openalex.org/W2143612262","https://openalex.org/W2150856297","https://openalex.org/W2193413348","https://openalex.org/W2194775991","https://openalex.org/W2237426222","https://openalex.org/W2296681920","https://openalex.org/W2334493732","https://openalex.org/W2398118205","https://openalex.org/W2405756170","https://openalex.org/W2466918907","https://openalex.org/W2556930864","https://openalex.org/W2578392894","https://openalex.org/W2580178245","https://openalex.org/W2582956876","https://openalex.org/W2591927543","https://openalex.org/W2619368999","https://openalex.org/W2766812927","https://openalex.org/W2796315435","https://openalex.org/W2799473636","https://openalex.org/W2911629330","https://openalex.org/W2916979304","https://openalex.org/W2919115771","https://openalex.org/W2922538097","https://openalex.org/W2949328740","https://openalex.org/W2962753610","https://openalex.org/W2962793481","https://openalex.org/W2962824709","https://openalex.org/W2962862718","https://openalex.org/W2963066677","https://openalex.org/W2963073614","https://openalex.org/W2963163163","https://openalex.org/W2963311389","https://openalex.org/W2963330681","https://openalex.org/W2963373786","https://openalex.org/W2963663420","https://openalex.org/W2963691546","https://openalex.org/W2963902314","https://openalex.org/W2963966654","https://openalex.org/W2963981733","https://openalex.org/W2964024144","https://openalex.org/W2964281804","https://openalex.org/W2964337551","https://openalex.org/W2972495969","https://openalex.org/W2979157532","https://openalex.org/W3101648800","https://openalex.org/W3138304788","https://openalex.org/W3201409833","https://openalex.org/W4234079003","https://openalex.org/W4296979096","https://openalex.org/W4297606427","https://openalex.org/W4301206121","https://openalex.org/W4320013936","https://openalex.org/W6631636882","https://openalex.org/W6638319203","https://openalex.org/W6638523607","https://openalex.org/W6639102338","https://openalex.org/W6640090968","https://openalex.org/W6678815747","https://openalex.org/W6679792166","https://openalex.org/W6679909955","https://openalex.org/W6687566353","https://openalex.org/W6697456849","https://openalex.org/W6713645886","https://openalex.org/W6718379498","https://openalex.org/W6729977899","https://openalex.org/W6745983426","https://openalex.org/W6749489859","https://openalex.org/W6761157444","https://openalex.org/W6765779288"],"related_works":["https://openalex.org/W4390516098","https://openalex.org/W2181948922","https://openalex.org/W2384362569","https://openalex.org/W2142795561","https://openalex.org/W4205302943","https://openalex.org/W2561132942","https://openalex.org/W3155418658","https://openalex.org/W4243199227","https://openalex.org/W2950240247","https://openalex.org/W3015215494"],"abstract_inverted_index":{"Direct":[0],"speech-to-image":[1],"translation":[2],"without":[3,75,155],"text":[4,158],"is":[5,83,97,121,145],"an":[6,92],"interesting":[7],"and":[8,54,95,137],"useful":[9],"topic":[10],"due":[11],"to":[12,25,46,66,85,107,123,147],"the":[13,48,68,72,76,87,129,149,156],"potential":[14],"applications":[15],"in":[16],"human-computer":[17],"interaction,":[18],"art":[19],"creation,":[20],"computer-aided":[21],"design.":[22],"etc.":[23],"Not":[24],"mention":[26],"that":[27,141],"many":[28],"languages":[29],"have":[30],"no":[31],"writing":[32],"form.":[33],"However,":[34],"as":[35,37,91],"far":[36],"we":[38,64],"know,":[39],"it":[40,96],"has":[41],"not":[42],"been":[43],"well-studied":[44],"how":[45,55],"translate":[47,67,148],"speech":[49,69,81,89,151],"signals":[50,70,74,90,152],"into":[51,71,153],"images":[52,126,154],"directly":[53],"well":[56],"they":[57],"can":[58],"be":[59],"translated.":[60],"In":[61],"this":[62],"paper,":[63],"attempt":[65],"image":[73,102],"transcription":[77],"stage.":[78],"Specifically,":[79],"a":[80,100,116],"encoder":[82,103],"designed":[84],"represent":[86],"input":[88],"embedding":[93,130],"feature,":[94],"trained":[98],"with":[99],"pretrained":[101],"using":[104],"teacher-student":[105],"learning":[106],"obtain":[108],"better":[109],"generalization":[110],"ability":[111],"on":[112,128,134],"new":[113],"classes.":[114],"Subsequently,":[115],"stacked":[117],"generative":[118],"adversarial":[119],"network":[120],"used":[122],"synthesize":[124],"high-quality":[125],"conditioned":[127],"feature.":[131],"Experimental":[132],"results":[133],"both":[135],"synthesized":[136],"real":[138],"data":[139],"show":[140],"our":[142,166],"proposed":[143],"method":[144],"effective":[146],"raw":[150],"middle":[157],"representation.":[159],"Ablation":[160],"study":[161],"gives":[162],"more":[163],"insights":[164],"about":[165],"method.":[167]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":4},{"year":2024,"cited_by_count":8},{"year":2023,"cited_by_count":15},{"year":2022,"cited_by_count":4},{"year":2021,"cited_by_count":7},{"year":2020,"cited_by_count":2}],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-10-10T00:00:00"}
