{"id":"https://openalex.org/W111747841","doi":"https://doi.org/10.21437/eurospeech.1997-483","title":"Issues in database creation: recording new populations, faster and better labelling","display_name":"Issues in database creation: recording new populations, faster and better labelling","publication_year":1997,"publication_date":"1997-09-22","ids":{"openalex":"https://openalex.org/W111747841","doi":"https://doi.org/10.21437/eurospeech.1997-483","mag":"111747841"},"language":"en","primary_location":{"id":"doi:10.21437/eurospeech.1997-483","is_oa":false,"landing_page_url":"https://doi.org/10.21437/eurospeech.1997-483","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"5th European Conference on Speech Communication and Technology (Eurospeech 1997)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5077285164","display_name":"Maxine Esk\u00e9nazi","orcid":null},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Maxine Eskenazi","raw_affiliation_strings":["Carnegie Mellon University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5045107570","display_name":"Christopher J. Hogan","orcid":"https://orcid.org/0000-0001-7655-4980"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"C. Hogan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113713422","display_name":"J. Allen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"J. Allen","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5111890705","display_name":"Robert Frederking","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"R. Frederking","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.6046,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.73093553,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"1699","last_page":"1702"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9799000024795532,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9799000024795532,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7699724435806274},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.530102014541626},{"id":"https://openalex.org/keywords/population","display_name":"Population","score":0.527032196521759},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.5167995691299438},{"id":"https://openalex.org/keywords/porting","display_name":"Porting","score":0.5167855024337769},{"id":"https://openalex.org/keywords/speech-corpus","display_name":"Speech corpus","score":0.4914013147354126},{"id":"https://openalex.org/keywords/reading","display_name":"Reading (process)","score":0.48955902457237244},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.47283607721328735},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4297233521938324},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.4278007745742798},{"id":"https://openalex.org/keywords/speech-analytics","display_name":"Speech analytics","score":0.42772117257118225},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3513195514678955},{"id":"https://openalex.org/keywords/speech-synthesis","display_name":"Speech synthesis","score":0.32732510566711426},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.12230002880096436}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7699724435806274},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.530102014541626},{"id":"https://openalex.org/C2908647359","wikidata":"https://www.wikidata.org/wiki/Q2625603","display_name":"Population","level":2,"score":0.527032196521759},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.5167995691299438},{"id":"https://openalex.org/C106251023","wikidata":"https://www.wikidata.org/wiki/Q851989","display_name":"Porting","level":3,"score":0.5167855024337769},{"id":"https://openalex.org/C91863865","wikidata":"https://www.wikidata.org/wiki/Q4349497","display_name":"Speech corpus","level":3,"score":0.4914013147354126},{"id":"https://openalex.org/C554936623","wikidata":"https://www.wikidata.org/wiki/Q199657","display_name":"Reading (process)","level":2,"score":0.48955902457237244},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.47283607721328735},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4297233521938324},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.4278007745742798},{"id":"https://openalex.org/C54953205","wikidata":"https://www.wikidata.org/wiki/Q4142201","display_name":"Speech analytics","level":4,"score":0.42772117257118225},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3513195514678955},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.32732510566711426},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.12230002880096436},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0},{"id":"https://openalex.org/C149923435","wikidata":"https://www.wikidata.org/wiki/Q37732","display_name":"Demography","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.21437/eurospeech.1997-483","is_oa":false,"landing_page_url":"https://doi.org/10.21437/eurospeech.1997-483","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"5th European Conference on Speech Communication and Technology (Eurospeech 1997)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.4300000071525574,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":4,"referenced_works":["https://openalex.org/W184563473","https://openalex.org/W2120316500","https://openalex.org/W2461763846","https://openalex.org/W3037924790"],"related_works":["https://openalex.org/W231741463","https://openalex.org/W2036933852","https://openalex.org/W2550171623","https://openalex.org/W2184371793","https://openalex.org/W2014684632","https://openalex.org/W2105439218","https://openalex.org/W4388404911","https://openalex.org/W168921769","https://openalex.org/W2147998355","https://openalex.org/W2066051122"],"abstract_inverted_index":{"As":[0,146,261],"speech":[1,39,100,107,116,147,208,218,242,266,416,437,490,533,617],"recognition":[2,148],"systems":[3,149],"become":[4,150],"more":[5,11,151,157,216,443],"accurate,":[6,152],"they":[7,153,430,692],"are":[8,154,250,502,634,653,665,693],"used":[9,21],"for":[10,26,31,73,252,264,370,409,584,637],"diverse":[12,102,158],"applications.":[13,159],"These":[14],"applications":[15,402],"often":[16],"involve":[17],"populations":[18,61,178,190,318,335,348,472,681],"who":[19],"never":[20],"a":[22,197,210,220,225,234,293,386,476,497,539,548,554,560,571,577],"recogniser":[23],"before":[24],"and":[25,53,62,82,95,130,291,352,397,517,557,606,623,697],"whom":[27],"the":[28,51,78,83,106,115,119,134,138,172,262,281,288,301,311,339,350,365,395,446,462,528,614,687],"standard":[29],"data":[30,56,71,243,263,368,390,399,417,463,491,512],"adult":[32,34,38,259,435],"male,":[33],"female,":[35],"or":[36,413,564],"mixed":[37],"is":[40,111,237,298,524,586,611,618,621],"not":[41,206,358],"very":[42,112],"representative.":[43],"This":[44,536],"paper":[45],"will":[46,336,383],"deal":[47,344],"with":[48,70,345,566,603,644,658,702],"issues":[49,90,282,306,325,355],"concerning":[50],"collection":[52],"processing":[54,374],"of":[55,65,99,108,136,167,179,186,194,229,241,245,248,280,295,319,367,379,394,403,465,473,478,514,530,551,562,574,579,679],"from":[57,63,101,114,176,209,224,332,496],"those":[58],"new":[59,235,276,317,334,387,555,680],"speaker":[60,103],"speakers":[64,180,230,255,636,655],"different":[66,113],"languages.":[67,670],"It":[68,87,620],"deals":[69],"collected":[72,117,251],"various":[74],"projects,":[75],"such":[76,308,321],"as":[77,309,322,488,492],"KIDS":[79,447],"database":[80,448],"[1]":[81],"Diplomat":[84,600],"project":[85,522],"[2].":[86],"specifically":[88],"discusses":[89],"related":[91],"to":[92,156,215,343,389,433,441,460,486,510,526,570,587,594,613,626,646,660],"obtaining":[93,287],"quantitatively":[94],"qualitatively":[96],"sufficient":[97],"amounts":[98,185,193,240,366],"populations.":[104],"Since":[105,452,608],"these":[109],"individuals":[110],"in":[118,164,171,196,219,283,341,349,353,364,484,508,668,686],"past,":[120],"we":[121,382,700],"assume":[122],"that":[123,246,297,356,405,543],"some":[124],"hand":[125],"labelling":[126,139],"may":[127],"be":[128],"necessary":[129],"therefore":[131],"also":[132,384],"address":[133,377],"issue":[135],"ameliorating":[137],"process.":[140],"1.":[141],"ADAPTATON":[142],"TO":[143],"NEW":[144,672],"APPLICATIONS":[145],"ported":[155],"Changing":[160],"domains":[161],"involves":[162],"changes":[163],"many":[165],"levels":[166],"processing.":[168],"Data":[169],"obtained":[170],"past":[173],"has":[174,269,274,601],"varied":[175],"large":[177,239],"carefully":[181],"reading":[182,191,500,518],"relatively":[183,211],"small":[184,212,227],"text":[187,195,466],"(TIMIT),":[188],"smaller":[189],"larger":[192],"defined":[198],"application":[199,236],"domain":[200,223],"(DARPA":[201],"RM),":[202],"heavily":[203],"constrained,":[204],"but":[205],"read,":[207,461],"population":[213,477],"(ATIS)":[214],"spontaneous":[217,629],"less":[221,624],"restrained":[222],"fairly":[226],"number":[228],"(Broadcast":[231],"News).":[232],"When":[233],"defined,":[238],"typical":[244],"type":[247],"variability":[249],"training.":[253],"The":[254,329,362,400,520],"have":[256,326,357,407,411,431],"generally":[257],"been":[258,360],"natives.":[260],"automatic":[265],"recognizers":[267],"(ASRs)":[268],"changed,":[270],"each":[271],"newly-defined":[272],"hurdle":[273],"revealed":[275],"datagathering":[277],"issues.":[278],"Some":[279],"Broadcast":[284],"News":[285],"concerned":[286],"broadcast":[289],"signal":[290,302,312],"choosing":[292],"subset":[294],"all":[296],"broadcast.":[299],"Once":[300],"was":[303,482],"recorded,":[304],"other":[305,324,347],"surfaced,":[307],"segmenting":[310],"into":[313],"usable":[314],"chunks.":[315],"With":[316],"users,":[320],"children,":[323],"come":[327],"up.":[328],"information":[330],"drawn":[331],"our":[333,677],"hopefully":[337],"aid":[338],"reader":[340],"preparing":[342],"yet":[346,359],"future,":[351],"anticipating":[354],"encountered.":[361],"increase":[363],"needed":[369],"training":[371],"requires":[372],"better":[373],"methodologies.":[375],"To":[376],"part":[378],"this":[380],"issue,":[381],"discuss":[385],"approach":[388],"labelling.":[391],"1.1.":[392],"Description":[393],"projects":[396],"their":[398,419],"few":[401],"ASRs":[404],"presently":[406],"children":[408,458,495],"users":[410],"little":[412],"no":[414,694],"children\u2019s":[415],"at":[418,425,456,547],"disposal.":[420],"Instead,":[421],"like":[422],"Project":[423,453],"LISTEN":[424,454],"Carnegie":[426],"Mellon":[427],"University":[428],"[3],":[429],"had":[432],"use":[434,583],"female":[436],"models.":[438],"In":[439],"order":[440,485,509],"furnish":[442],"appropriate":[444],"data,":[445],"recorded":[449,483,506],"76":[450],"children.":[451],"aims":[455],"helping":[457],"learn":[459],"consists":[464],"read":[467,616],"aloud.":[468],"There":[469],"were":[470,505],"2":[471],"speakers.":[474],"First,":[475],"good":[479,572],"readers":[480],"(SUM95)":[481],"obtain":[487],"much":[489],"possible.":[493],"Then,":[494],"school":[498],"where":[499],"scores":[501],"especially":[503],"low":[504],"(FP)":[507],"get":[511],"representative":[513],"local":[515,596],"dialect":[516],"hesitations.":[519],"DIPLOMAT":[521,585],"[4]":[523],"designed":[525],"test":[527],"feasibility":[529],"rapid-deployment,":[531],"wearable":[532],"translation":[534,541],"systems.":[535],"means":[537],"developing":[538],"machine":[540],"system":[542],"performs":[544],"initial":[545],"translations":[546],"useful":[549],"level":[550,573],"quality":[552,575],"between":[553],"language":[556],"English":[558],"within":[559],"matter":[561],"days":[563],"weeks,":[565],"continual,":[567],"graceful":[568],"improvement":[569],"over":[576],"period":[578],"months.":[580],"A":[581],"potential":[582],"allow":[588],"English-speaking":[589],"soldiers":[590],"on":[591],"peacekeeping":[592],"missions":[593],"interview":[595],"residents.":[597],"So":[598],"far,":[599],"worked":[602],"Serbo-Croatian,":[604],"Creole,":[605],"Korean.":[607],"rapid":[609],"deployment":[610],"central":[612],"project,":[615],"used.":[619],"faster":[622],"labor-intensive":[625],"develop":[627],"than":[628],"speech.":[630],"At":[631],"present,":[632],"there":[633,652],"13":[635],"Haitian":[638],"Creole":[639],"(hereafter,":[640],"Creole)":[641],"(10m,":[642],"3f)":[643,657],"99":[645],"231":[647],"sentences":[648,662],"each.":[649,663],"For":[650],"Korean":[651],"8":[654],"(5m.,":[656],"118":[659],"180":[661],"Recordings":[664],"still":[666],"underway":[667],"both":[669],"2.":[671],"SPEAKER":[673],"POPULATIONS":[674],"We":[675,689],"group":[676],"observations":[678],"according":[682],"assumptions":[683],"researchers":[684],"made":[685],"past.":[688],"examine":[690],"how":[691,699],"longer":[695],"valid,":[696],"note":[698],"dealt":[701],"them.":[703]},"counts_by_year":[{"year":2016,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
