{"id":"https://openalex.org/W3203982370","doi":"https://doi.org/10.1109/ijcnn55064.2022.9892863","title":"Audio-to-Image Cross-Modal Generation","display_name":"Audio-to-Image Cross-Modal Generation","publication_year":2022,"publication_date":"2022-07-18","ids":{"openalex":"https://openalex.org/W3203982370","doi":"https://doi.org/10.1109/ijcnn55064.2022.9892863","mag":"3203982370"},"language":"en","primary_location":{"id":"doi:10.1109/ijcnn55064.2022.9892863","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn55064.2022.9892863","pdf_url":null,"source":{"id":"https://openalex.org/S4363607707","display_name":"2022 International Joint Conference on Neural Networks (IJCNN)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5034675385","display_name":"Maciej \u017belaszczyk","orcid":null},"institutions":[{"id":"https://openalex.org/I108403487","display_name":"Warsaw University of Technology","ror":"https://ror.org/00y0xnp53","country_code":"PL","type":"education","lineage":["https://openalex.org/I108403487"]},{"id":"https://openalex.org/I4210136912","display_name":"Wy\u017csza Szko\u0142a Technologii Informatycznych w Warszawie","ror":"https://ror.org/03v1svm63","country_code":"PL","type":"education","lineage":["https://openalex.org/I4210136912"]}],"countries":["PL"],"is_corresponding":false,"raw_author_name":"Maciej Zelaszczyk","raw_affiliation_strings":["Science Warsaw University of Technology,Faculty of Mathematics and Information,Warsaw,Poland","Faculty of Mathematics and Information, Science Warsaw University of Technology, Warsaw, Poland"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Science Warsaw University of Technology,Faculty of Mathematics and Information,Warsaw,Poland","institution_ids":["https://openalex.org/I4210136912","https://openalex.org/I108403487"]},{"raw_affiliation_string":"Faculty of Mathematics and Information, Science Warsaw University of Technology, Warsaw, Poland","institution_ids":["https://openalex.org/I108403487"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5073814691","display_name":"Jacek Ma\u0144dziuk","orcid":"https://orcid.org/0000-0003-0947-028X"},"institutions":[{"id":"https://openalex.org/I108403487","display_name":"Warsaw University of Technology","ror":"https://ror.org/00y0xnp53","country_code":"PL","type":"education","lineage":["https://openalex.org/I108403487"]},{"id":"https://openalex.org/I4210136912","display_name":"Wy\u017csza Szko\u0142a Technologii Informatycznych w Warszawie","ror":"https://ror.org/03v1svm63","country_code":"PL","type":"education","lineage":["https://openalex.org/I4210136912"]}],"countries":["PL"],"is_corresponding":false,"raw_author_name":"Jacek Mandziuk","raw_affiliation_strings":["Science Warsaw University of Technology,Faculty of Mathematics and Information,Warsaw,Poland","Faculty of Mathematics and Information, Science Warsaw University of Technology, Warsaw, Poland"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Science Warsaw University of Technology,Faculty of Mathematics and Information,Warsaw,Poland","institution_ids":["https://openalex.org/I4210136912","https://openalex.org/I108403487"]},{"raw_affiliation_string":"Faculty of Mathematics and Information, Science Warsaw University of Technology, Warsaw, Poland","institution_ids":["https://openalex.org/I108403487"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.3314,"has_fulltext":false,"cited_by_count":13,"citation_normalized_percentile":{"value":0.82247403,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"8"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.9890999794006348,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9796000123023987,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7472753524780273},{"id":"https://openalex.org/keywords/generative-grammar","display_name":"Generative grammar","score":0.6757903099060059},{"id":"https://openalex.org/keywords/modal","display_name":"Modal","score":0.640551745891571},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.60310298204422},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.5915287733078003},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.5597739815711975},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5553686022758484},{"id":"https://openalex.org/keywords/modalities","display_name":"Modalities","score":0.5546963810920715},{"id":"https://openalex.org/keywords/consistency","display_name":"Consistency (knowledge bases)","score":0.5227141976356506},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.5158116817474365},{"id":"https://openalex.org/keywords/modality","display_name":"Modality (human\u2013computer interaction)","score":0.5029537081718445},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.4910428524017334},{"id":"https://openalex.org/keywords/adversarial-system","display_name":"Adversarial system","score":0.4500942826271057},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3541247844696045},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.33665087819099426},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.1098368763923645}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7472753524780273},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.6757903099060059},{"id":"https://openalex.org/C71139939","wikidata":"https://www.wikidata.org/wiki/Q910194","display_name":"Modal","level":2,"score":0.640551745891571},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.60310298204422},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.5915287733078003},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.5597739815711975},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5553686022758484},{"id":"https://openalex.org/C2779903281","wikidata":"https://www.wikidata.org/wiki/Q6888026","display_name":"Modalities","level":2,"score":0.5546963810920715},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.5227141976356506},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.5158116817474365},{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.5029537081718445},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.4910428524017334},{"id":"https://openalex.org/C37736160","wikidata":"https://www.wikidata.org/wiki/Q1801315","display_name":"Adversarial system","level":2,"score":0.4500942826271057},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3541247844696045},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.33665087819099426},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.1098368763923645},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C36289849","wikidata":"https://www.wikidata.org/wiki/Q34749","display_name":"Social science","level":1,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C188027245","wikidata":"https://www.wikidata.org/wiki/Q750446","display_name":"Polymer chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/ijcnn55064.2022.9892863","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn55064.2022.9892863","pdf_url":null,"source":{"id":"https://openalex.org/S4363607707","display_name":"2022 International Joint Conference on Neural Networks (IJCNN)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":64,"referenced_works":["https://openalex.org/W1522301498","https://openalex.org/W1959608418","https://openalex.org/W1994618660","https://openalex.org/W2017257315","https://openalex.org/W2025768430","https://openalex.org/W2042492924","https://openalex.org/W2099471712","https://openalex.org/W2102409316","https://openalex.org/W2102765684","https://openalex.org/W2112796928","https://openalex.org/W2122538988","https://openalex.org/W2163922914","https://openalex.org/W2405756170","https://openalex.org/W2504108613","https://openalex.org/W2519091744","https://openalex.org/W2752796333","https://openalex.org/W2766736793","https://openalex.org/W2785678896","https://openalex.org/W2805122419","https://openalex.org/W2883222475","https://openalex.org/W2893749619","https://openalex.org/W2912225506","https://openalex.org/W2949382160","https://openalex.org/W2951237705","https://openalex.org/W2952558884","https://openalex.org/W2963066677","https://openalex.org/W2963115079","https://openalex.org/W2963265008","https://openalex.org/W2963663420","https://openalex.org/W2963684088","https://openalex.org/W2963799213","https://openalex.org/W2963836885","https://openalex.org/W2964121744","https://openalex.org/W2966715458","https://openalex.org/W2970608575","https://openalex.org/W2970652125","https://openalex.org/W2970873268","https://openalex.org/W2970941416","https://openalex.org/W2971074500","https://openalex.org/W2981851019","https://openalex.org/W2987472543","https://openalex.org/W3012090677","https://openalex.org/W3046890131","https://openalex.org/W3089177030","https://openalex.org/W3123798147","https://openalex.org/W3215180973","https://openalex.org/W4214705877","https://openalex.org/W4288573225","https://openalex.org/W4294491235","https://openalex.org/W4320013936","https://openalex.org/W6631190155","https://openalex.org/W6640963894","https://openalex.org/W6675401909","https://openalex.org/W6685352114","https://openalex.org/W6713645886","https://openalex.org/W6748582592","https://openalex.org/W6752791795","https://openalex.org/W6759807521","https://openalex.org/W6762931180","https://openalex.org/W6766904570","https://openalex.org/W6767441326","https://openalex.org/W6767513282","https://openalex.org/W6779669310","https://openalex.org/W6804832055"],"related_works":["https://openalex.org/W73545470","https://openalex.org/W4224266612","https://openalex.org/W2383394264","https://openalex.org/W4320153225","https://openalex.org/W4293261942","https://openalex.org/W3125968744","https://openalex.org/W2004831463","https://openalex.org/W2110287964","https://openalex.org/W4307407935","https://openalex.org/W649759291"],"abstract_inverted_index":{"Cross-modal":[0],"representation":[1],"learning":[2],"allows":[3],"to":[4,22,66,71,89],"integrate":[5],"information":[6],"from":[7,75],"different":[8],"modalities":[9],"into":[10],"one":[11,51],"representation.":[12],"At":[13],"the":[14,25,41,54,64,94,105,110,121,135,138],"same":[15],"time,":[16],"research":[17],"on":[18,24,31],"generative":[19,55],"models":[20],"tends":[21],"focus":[23],"visual":[26],"domain":[27],"with":[28],"less":[29],"emphasis":[30],"other":[32],"domains,":[33],"such":[34],"as":[35],"audio":[36,76],"or":[37,125],"text,":[38],"potentially":[39],"missing":[40],"benefits":[42],"of":[43,109],"shared":[44],"representations.":[45],"Studies":[46],"successfully":[47],"linking":[48],"more":[49,91],"than":[50],"modality":[52],"in":[53,82,87,93,134],"setting":[56],"are":[57,141,147,153],"rare.":[58],"In":[59],"this":[60,114],"context,":[61],"we":[62,79],"verify":[63],"possibility":[65],"train":[67],"variational":[68],"autoencoders":[69],"(VAEs)":[70],"reconstruct":[72],"image":[73,151],"archetypes":[74],"data.":[77],"Specifically,":[78],"consider":[80],"VAEs":[81],"an":[83],"adversarial":[84],"training":[85],"framework":[86],"order":[88],"ensure":[90],"variability":[92],"generated":[95,111,139],"data":[96],"and":[97,107],"find":[98],"that":[99,132,146],"there":[100],"is":[101],"a":[102],"trade-off":[103,115],"between":[104],"consistency":[106],"diversity":[108],"images":[112,140],"-":[113],"can":[116],"be":[117],"governed":[118],"by":[119],"scaling":[120],"reconstruction":[122],"loss":[123],"up":[124],"down,":[126],"respectively.":[127],"Our":[128],"results":[129],"further":[130],"suggest":[131],"even":[133],"case":[136],"when":[137],"relatively":[142],"inconsistent":[143],"(diverse),":[144],"features":[145],"critical":[148],"for":[149],"proper":[150],"classification":[152],"preserved.":[154]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":6},{"year":2023,"cited_by_count":2}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
