{"id":"https://openalex.org/W2104247845","doi":"https://doi.org/10.1109/slt.2010.5700813","title":"Learning from images and speech with Non-negative Matrix Factorization enhanced by input space scaling","display_name":"Learning from images and speech with Non-negative Matrix Factorization enhanced by input space scaling","publication_year":2010,"publication_date":"2010-12-01","ids":{"openalex":"https://openalex.org/W2104247845","doi":"https://doi.org/10.1109/slt.2010.5700813","mag":"2104247845"},"language":"en","primary_location":{"id":"doi:10.1109/slt.2010.5700813","is_oa":false,"landing_page_url":"https://doi.org/10.1109/slt.2010.5700813","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2010 IEEE Spoken Language Technology Workshop","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://lirias.kuleuven.be/handle/123456789/291537","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5103936265","display_name":"Joris Driesen","orcid":null},"institutions":[{"id":"https://openalex.org/I99464096","display_name":"KU Leuven","ror":"https://ror.org/05f950310","country_code":"BE","type":"education","lineage":["https://openalex.org/I99464096"]}],"countries":["BE"],"is_corresponding":true,"raw_author_name":"Joris Driesen","raw_affiliation_strings":["Department ESAT-PSI, K.U. Leuven, Leuven, Belgium","Dept. ESAT-PSI, K.U.Leuven, Belgium"],"affiliations":[{"raw_affiliation_string":"Department ESAT-PSI, K.U. Leuven, Leuven, Belgium","institution_ids":["https://openalex.org/I99464096"]},{"raw_affiliation_string":"Dept. ESAT-PSI, K.U.Leuven, Belgium","institution_ids":["https://openalex.org/I99464096"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5087514947","display_name":"Hugo Van hamme","orcid":"https://orcid.org/0000-0003-1331-5186"},"institutions":[{"id":"https://openalex.org/I99464096","display_name":"KU Leuven","ror":"https://ror.org/05f950310","country_code":"BE","type":"education","lineage":["https://openalex.org/I99464096"]}],"countries":["BE"],"is_corresponding":false,"raw_author_name":"Hugo Van hamme","raw_affiliation_strings":["Department ESAT-PSI, K.U. Leuven, Leuven, Belgium","Dept. ESAT-PSI, K.U.Leuven, Belgium"],"affiliations":[{"raw_affiliation_string":"Department ESAT-PSI, K.U. Leuven, Leuven, Belgium","institution_ids":["https://openalex.org/I99464096"]},{"raw_affiliation_string":"Dept. ESAT-PSI, K.U.Leuven, Belgium","institution_ids":["https://openalex.org/I99464096"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5087492771","display_name":"W. Bastiaan Kleijn","orcid":"https://orcid.org/0000-0002-1973-3920"},"institutions":[{"id":"https://openalex.org/I86987016","display_name":"KTH Royal Institute of Technology","ror":"https://ror.org/026vcq606","country_code":"SE","type":"education","lineage":["https://openalex.org/I86987016"]}],"countries":["SE"],"is_corresponding":false,"raw_author_name":"W. Bastiaan Kleijn","raw_affiliation_strings":["Sound and Image Processing Laboratory, KTH Royal Institute of Technology, Stockholm, Sweden","Sound and Image Processing Laboratory, KTH, Stockholm, Sweden"],"affiliations":[{"raw_affiliation_string":"Sound and Image Processing Laboratory, KTH Royal Institute of Technology, Stockholm, Sweden","institution_ids":["https://openalex.org/I86987016"]},{"raw_affiliation_string":"Sound and Image Processing Laboratory, KTH, Stockholm, Sweden","institution_ids":["https://openalex.org/I86987016"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5103936265"],"corresponding_institution_ids":["https://openalex.org/I99464096"],"apc_list":null,"apc_paid":null,"fwci":1.3531,"has_fulltext":false,"cited_by_count":7,"citation_normalized_percentile":{"value":0.84836046,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10824","display_name":"Image Retrieval and Classification Techniques","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9976999759674072,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/non-negative-matrix-factorization","display_name":"Non-negative matrix factorization","score":0.8355286717414856},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7931911945343018},{"id":"https://openalex.org/keywords/latent-dirichlet-allocation","display_name":"Latent Dirichlet allocation","score":0.7913116216659546},{"id":"https://openalex.org/keywords/matrix-decomposition","display_name":"Matrix decomposition","score":0.7631773948669434},{"id":"https://openalex.org/keywords/probabilistic-latent-semantic-analysis","display_name":"Probabilistic latent semantic analysis","score":0.6464575529098511},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5677278637886047},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.518149733543396},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.4492892920970917},{"id":"https://openalex.org/keywords/probabilistic-logic","display_name":"Probabilistic logic","score":0.4449020028114319},{"id":"https://openalex.org/keywords/topic-model","display_name":"Topic model","score":0.39544469118118286},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.37620699405670166},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.36355000734329224},{"id":"https://openalex.org/keywords/eigenvalues-and-eigenvectors","display_name":"Eigenvalues and eigenvectors","score":0.09811177849769592}],"concepts":[{"id":"https://openalex.org/C152671427","wikidata":"https://www.wikidata.org/wiki/Q10843505","display_name":"Non-negative matrix factorization","level":4,"score":0.8355286717414856},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7931911945343018},{"id":"https://openalex.org/C500882744","wikidata":"https://www.wikidata.org/wiki/Q269236","display_name":"Latent Dirichlet allocation","level":3,"score":0.7913116216659546},{"id":"https://openalex.org/C42355184","wikidata":"https://www.wikidata.org/wiki/Q1361088","display_name":"Matrix decomposition","level":3,"score":0.7631773948669434},{"id":"https://openalex.org/C112933361","wikidata":"https://www.wikidata.org/wiki/Q2845258","display_name":"Probabilistic latent semantic analysis","level":2,"score":0.6464575529098511},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5677278637886047},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.518149733543396},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.4492892920970917},{"id":"https://openalex.org/C49937458","wikidata":"https://www.wikidata.org/wiki/Q2599292","display_name":"Probabilistic logic","level":2,"score":0.4449020028114319},{"id":"https://openalex.org/C171686336","wikidata":"https://www.wikidata.org/wiki/Q3532085","display_name":"Topic model","level":2,"score":0.39544469118118286},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.37620699405670166},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.36355000734329224},{"id":"https://openalex.org/C158693339","wikidata":"https://www.wikidata.org/wiki/Q190524","display_name":"Eigenvalues and eigenvectors","level":2,"score":0.09811177849769592},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/slt.2010.5700813","is_oa":false,"landing_page_url":"https://doi.org/10.1109/slt.2010.5700813","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2010 IEEE Spoken Language Technology Workshop","raw_type":"proceedings-article"},{"id":"pmh:oai:lirias2repo.kuleuven.be:123456789/291537","is_oa":true,"landing_page_url":"https://lirias.kuleuven.be/handle/123456789/291537","pdf_url":null,"source":{"id":"https://openalex.org/S4306401954","display_name":"Lirias (KU Leuven)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I99464096","host_organization_name":"KU Leuven","host_organization_lineage":["https://openalex.org/I99464096"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"IEEE workshop on spoken language technology - SLT 2010, Berkeley, California, USA, 12-15 December 2010","raw_type":"info:eu-repo/semantics/publishedVersion"}],"best_oa_location":{"id":"pmh:oai:lirias2repo.kuleuven.be:123456789/291537","is_oa":true,"landing_page_url":"https://lirias.kuleuven.be/handle/123456789/291537","pdf_url":null,"source":{"id":"https://openalex.org/S4306401954","display_name":"Lirias (KU Leuven)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I99464096","host_organization_name":"KU Leuven","host_organization_lineage":["https://openalex.org/I99464096"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"IEEE workshop on spoken language technology - SLT 2010, Berkeley, California, USA, 12-15 December 2010","raw_type":"info:eu-repo/semantics/publishedVersion"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":19,"referenced_works":["https://openalex.org/W176887922","https://openalex.org/W189630165","https://openalex.org/W1576445103","https://openalex.org/W1603096025","https://openalex.org/W1605557114","https://openalex.org/W1612003148","https://openalex.org/W1880262756","https://openalex.org/W1902027874","https://openalex.org/W2000359198","https://openalex.org/W2028080169","https://openalex.org/W2066941820","https://openalex.org/W2073857938","https://openalex.org/W2124386111","https://openalex.org/W2136851074","https://openalex.org/W2527623718","https://openalex.org/W6607207477","https://openalex.org/W6634343353","https://openalex.org/W6636123748","https://openalex.org/W6639619044"],"related_works":["https://openalex.org/W1551384396","https://openalex.org/W2888805565","https://openalex.org/W2096865229","https://openalex.org/W2921491680","https://openalex.org/W2251863249","https://openalex.org/W4291700620","https://openalex.org/W2132052677","https://openalex.org/W3159709618","https://openalex.org/W4396666968","https://openalex.org/W2110027950"],"abstract_inverted_index":{"Computional":[0],"learning":[1],"from":[2],"multimodal":[3],"data":[4,55,64],"is":[5,57],"often":[6],"done":[7],"with":[8],"matrix":[9],"factorization":[10],"techniques":[11],"such":[12,53],"as":[13],"NMF":[14],"(Non-negative":[15],"Matrix":[16],"Factorization),":[17],"pLSA":[18],"(Probabilistic":[19],"Latent":[20],"Semantic":[21],"Analysis)":[22],"or":[23],"LDA":[24],"(Latent":[25],"Dirichlet":[26],"Allocation).":[27],"The":[28],"different":[29],"modalities":[30],"of":[31,52,62,99],"the":[32,68,97],"input":[33],"are":[34,42],"to":[35,104],"this":[36,71,100],"end":[37],"converted":[38],"into":[39],"features":[40,65],"that":[41,58,95],"easily":[43],"placed":[44],"in":[45,102],"a":[46,54,60,76,92],"vectorized":[47],"format.":[48],"An":[49],"inherent":[50],"weakness":[51],"representation":[56],"only":[59],"subset":[61],"these":[63],"actually":[66],"aids":[67],"learning.":[69],"In":[70],"paper,":[72],"we":[73],"first":[74],"describe":[75],"simple":[77],"NMF-based":[78],"recognition":[79,107],"framework":[80,101],"operating":[81],"on":[82],"speech":[83],"and":[84,90],"image":[85],"data.":[86],"We":[87],"then":[88],"propose":[89],"demonstrate":[91],"novel":[93],"algorithm":[94],"scales":[96],"inputs":[98],"order":[103],"optimize":[105],"its":[106],"performance.":[108]},"counts_by_year":[{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":1},{"year":2015,"cited_by_count":1},{"year":2013,"cited_by_count":3}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
