{"id":"https://openalex.org/W4383860292","doi":"https://doi.org/10.48550/arxiv.2307.03322","title":"BiPhone: Modeling Inter Language Phonetic Influences in Text","display_name":"BiPhone: Modeling Inter Language Phonetic Influences in Text","publication_year":2023,"publication_date":"2023-07-06","ids":{"openalex":"https://openalex.org/W4383860292","doi":"https://doi.org/10.48550/arxiv.2307.03322"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2307.03322","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2307.03322","pdf_url":"https://arxiv.org/pdf/2307.03322","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2307.03322","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5035742926","display_name":"Abhirut Gupta","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Gupta, Abhirut","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5087678154","display_name":"Ananya B. Sai","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sai, Ananya B.","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5063612469","display_name":"Richard Sproat","orcid":"https://orcid.org/0000-0002-9040-5196"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sproat, Richard","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010564768","display_name":"Yuri Vasilevski","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Vasilevski, Yuri","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101222530","display_name":"James S. Ren","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ren, James S.","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043510378","display_name":"Ambarish Jash","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jash, Ambarish","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5069902135","display_name":"Sukhdeep Sodhi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sodhi, Sukhdeep S.","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5084091166","display_name":"Aravindan Raghuveer","orcid":"https://orcid.org/0000-0001-5006-4385"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Raghuveer, Aravindan","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5035742926"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.9972000122070312,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9951000213623047,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.8185229301452637},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7921886444091797},{"id":"https://openalex.org/keywords/phone","display_name":"Phone","score":0.6503065824508667},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.6355739235877991},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5658161044120789},{"id":"https://openalex.org/keywords/byte","display_name":"Byte","score":0.559569776058197},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.494130402803421},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.4829908609390259},{"id":"https://openalex.org/keywords/generative-grammar","display_name":"Generative grammar","score":0.43585628271102905},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.41238778829574585},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.328876256942749}],"concepts":[{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.8185229301452637},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7921886444091797},{"id":"https://openalex.org/C2778707766","wikidata":"https://www.wikidata.org/wiki/Q202064","display_name":"Phone","level":2,"score":0.6503065824508667},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.6355739235877991},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5658161044120789},{"id":"https://openalex.org/C43364308","wikidata":"https://www.wikidata.org/wiki/Q8799","display_name":"Byte","level":2,"score":0.559569776058197},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.494130402803421},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.4829908609390259},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.43585628271102905},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.41238778829574585},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.328876256942749},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2307.03322","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2307.03322","pdf_url":"https://arxiv.org/pdf/2307.03322","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"doi:10.48550/arxiv.2307.03322","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2307.03322","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2307.03322","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2307.03322","pdf_url":"https://arxiv.org/pdf/2307.03322","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[{"score":0.8700000047683716,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4383860292.pdf"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2378211422","https://openalex.org/W2745001401","https://openalex.org/W4321353415","https://openalex.org/W2130974462","https://openalex.org/W2028665553","https://openalex.org/W2086519370","https://openalex.org/W972276598","https://openalex.org/W2087343574","https://openalex.org/W4246352526","https://openalex.org/W2121910908"],"abstract_inverted_index":{"A":[0],"large":[1,36],"number":[2,37],"of":[3,38,69,175],"people":[4],"are":[5,41,75],"forced":[6],"to":[7,20,52,65,150,154,163,183],"use":[8],"the":[9,26,109,114,160,173,180],"Web":[10],"in":[11,18,25,57,167,187],"a":[12,35,50,79,140],"language":[13,28,46,116,132,170],"they":[14],"have":[15,105],"low":[16],"literacy":[17],"due":[19],"technology":[21],"asymmetries.":[22],"Written":[23],"text":[24],"second":[27],"(L2)":[29],"from":[30],"such":[31],"users":[32],"often":[33],"contains":[34],"errors":[39],"that":[40,59,94,99,130],"influenced":[42],"by":[43],"their":[44],"native":[45],"(L1).":[47],"We":[48,111,137],"propose":[49],"method":[51],"mine":[53],"phoneme":[54,142],"confusions":[55,74],"(sounds":[56],"L2":[58,87],"an":[60],"L1":[61,70],"speaker":[62],"is":[63,179],"likely":[64],"conflate)":[66],"for":[67,83,124],"pairs":[68],"and":[71,103,128],"L2.":[72],"These":[73],"then":[76],"plugged":[77],"into":[78],"generative":[80],"model":[81],"(Bi-Phone)":[82],"synthetically":[84],"producing":[85],"corrupted":[86],"text.":[88,188],"Through":[89],"human":[90],"evaluations,":[91],"we":[92,157],"show":[93,129],"Bi-Phone":[95],"generates":[96],"plausible":[97],"corruptions":[98],"differ":[100],"across":[101],"L1s":[102],"also":[104,112,138,158],"widespread":[106],"coverage":[107],"on":[108],"Web.":[110],"corrupt":[113],"popular":[115],"understanding":[117],"benchmark":[118,162,182],"SuperGLUE":[119],"with":[120],"our":[121,176],"technique":[122],"(FunGLUE":[123],"Phonetically":[125],"Noised":[126],"GLUE)":[127],"SoTA":[131],"understating":[133],"models":[134,149],"perform":[135],"poorly.":[136],"introduce":[139,184],"new":[141],"prediction":[143],"pre-training":[144],"task":[145],"which":[146],"helps":[147],"byte":[148],"recover":[151],"performance":[152],"close":[153],"SuperGLUE.":[155],"Finally,":[156],"release":[159],"FunGLUE":[161,178],"promote":[164],"further":[165],"research":[166],"phonetically":[168],"robust":[169],"models.":[171],"To":[172],"best":[174],"knowledge,":[177],"first":[181],"L1-L2":[185],"interactions":[186]},"counts_by_year":[],"updated_date":"2026-03-13T16:22:10.518609","created_date":"2025-10-10T00:00:00"}
