{"id":"https://openalex.org/W7148444668","doi":"https://doi.org/10.48550/arxiv.2604.00339","title":"When Career Data Runs Out: Structured Feature Engineering and Signal Limits for Founder Success Prediction","display_name":"When Career Data Runs Out: Structured Feature Engineering and Signal Limits for Founder Success Prediction","publication_year":2026,"publication_date":"2026-04-01","ids":{"openalex":"https://openalex.org/W7148444668","doi":"https://doi.org/10.48550/arxiv.2604.00339"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.00339","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.00339","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.00339","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5132790584","display_name":"Yagiz Ihlamur","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Ihlamur, Yagiz","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5132790584"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10058","display_name":"Entrepreneurship Studies and Influences","score":0.19460000097751617,"subfield":{"id":"https://openalex.org/subfields/1405","display_name":"Management of Technology and Innovation"},"field":{"id":"https://openalex.org/fields/14","display_name":"Business, Management and Accounting"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T10058","display_name":"Entrepreneurship Studies and Influences","score":0.19460000097751617,"subfield":{"id":"https://openalex.org/subfields/1405","display_name":"Management of Technology and Innovation"},"field":{"id":"https://openalex.org/fields/14","display_name":"Business, Management and Accounting"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11653","display_name":"Financial Distress and Bankruptcy Prediction","score":0.13420000672340393,"subfield":{"id":"https://openalex.org/subfields/1402","display_name":"Accounting"},"field":{"id":"https://openalex.org/fields/14","display_name":"Business, Management and Accounting"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11903","display_name":"Private Equity and Venture Capital","score":0.08179999887943268,"subfield":{"id":"https://openalex.org/subfields/1402","display_name":"Accounting"},"field":{"id":"https://openalex.org/fields/14","display_name":"Business, Management and Accounting"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/json","display_name":"JSON","score":0.7958999872207642},{"id":"https://openalex.org/keywords/field","display_name":"Field (mathematics)","score":0.5795999765396118},{"id":"https://openalex.org/keywords/signal","display_name":"SIGNAL (programming language)","score":0.5424000024795532},{"id":"https://openalex.org/keywords/raw-data","display_name":"Raw data","score":0.5206000208854675},{"id":"https://openalex.org/keywords/lossy-compression","display_name":"Lossy compression","score":0.4984999895095825},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.48080000281333923},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.4523000121116638},{"id":"https://openalex.org/keywords/precision-and-recall","display_name":"Precision and recall","score":0.4251999855041504}],"concepts":[{"id":"https://openalex.org/C2780416260","wikidata":"https://www.wikidata.org/wiki/Q2063","display_name":"JSON","level":2,"score":0.7958999872207642},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6157000064849854},{"id":"https://openalex.org/C9652623","wikidata":"https://www.wikidata.org/wiki/Q190109","display_name":"Field (mathematics)","level":2,"score":0.5795999765396118},{"id":"https://openalex.org/C2779843651","wikidata":"https://www.wikidata.org/wiki/Q7390335","display_name":"SIGNAL (programming language)","level":2,"score":0.5424000024795532},{"id":"https://openalex.org/C132964779","wikidata":"https://www.wikidata.org/wiki/Q2110223","display_name":"Raw data","level":2,"score":0.5206000208854675},{"id":"https://openalex.org/C165021410","wikidata":"https://www.wikidata.org/wiki/Q55564","display_name":"Lossy compression","level":2,"score":0.4984999895095825},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.48080000281333923},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.48030000925064087},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.4523000121116638},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.44929999113082886},{"id":"https://openalex.org/C81669768","wikidata":"https://www.wikidata.org/wiki/Q2359161","display_name":"Precision and recall","level":2,"score":0.4251999855041504},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.37209999561309814},{"id":"https://openalex.org/C2778827112","wikidata":"https://www.wikidata.org/wiki/Q22245680","display_name":"Feature engineering","level":3,"score":0.3407999873161316},{"id":"https://openalex.org/C186644900","wikidata":"https://www.wikidata.org/wiki/Q194152","display_name":"Parsing","level":2,"score":0.32010000944137573},{"id":"https://openalex.org/C22367795","wikidata":"https://www.wikidata.org/wiki/Q7625208","display_name":"Structured prediction","level":2,"score":0.2980000078678131},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.2937000095844269},{"id":"https://openalex.org/C557433098","wikidata":"https://www.wikidata.org/wiki/Q94","display_name":"Android (operating system)","level":2,"score":0.2897999882698059},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.28619998693466187},{"id":"https://openalex.org/C2777489069","wikidata":"https://www.wikidata.org/wiki/Q1589822","display_name":"Ceiling (cloud)","level":2,"score":0.2827000021934509},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.27379998564720154},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.27079999446868896},{"id":"https://openalex.org/C2779696439","wikidata":"https://www.wikidata.org/wiki/Q7512811","display_name":"Signature (topology)","level":2,"score":0.267300009727478},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.26019999384880066},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.2533999979496002},{"id":"https://openalex.org/C45804977","wikidata":"https://www.wikidata.org/wiki/Q7239673","display_name":"Predictive modelling","level":2,"score":0.2533999979496002}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.00339","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.00339","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.00339","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.00339","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Predicting":[0],"startup":[1],"success":[2],"from":[3,36,88,123],"founder":[4],"career":[5],"data":[6],"is":[7,11,118,121,133],"hard.":[8],"The":[9,116,141],"signal":[10,112,164],"weak,":[12],"the":[13,75,89,124,150,163],"labels":[14],"are":[15],"rare":[16],"(9%),":[17],"and":[18,45,53,97,167],"most":[19],"founders":[20],"who":[21,28],"succeed":[22],"look":[23],"almost":[24],"identical":[25],"to":[26,181,188],"those":[27],"fail.":[29],"We":[30,79],"engineer":[31],"28":[32],"structured":[33],"features":[34,87,102],"directly":[35,130,180],"raw":[37],"JSON":[38,126],"fields":[39,127],"--":[40,44,70,131,176],"jobs,":[41],"education,":[42],"exits":[43],"combine":[46],"them":[47],"with":[48],"a":[49,71,82,134,138,157,173,183],"deterministic":[50],"rule":[51],"layer":[52],"XGBoost":[54],"boosted":[55],"stumps.":[56],"Our":[57],"model":[58,106],"achieves":[59],"Val":[60,146],"F0.5":[61],"=":[62,65,68,114],"0.3030,":[63],"Precision":[64],"0.3333,":[66],"Recall":[67],"0.2222":[69],"+17.7pp":[72],"improvement":[73],"over":[74],"zero-shot":[76],"LLM":[77,101],"baseline.":[78],"then":[80],"run":[81],"controlled":[83],"experiment:":[84],"extract":[85],"9":[86],"prose":[90],"field":[91],"using":[92],"Claude":[93],"Haiku,":[94],"at":[95],"67%":[96],"100%":[98],"dataset":[99,185],"coverage.":[100],"capture":[103],"26.4%":[104],"of":[105,153],"importance":[107],"but":[108],"add":[109],"zero":[110],"CV":[111],"(delta":[113],"-0.05pp).":[115],"reason":[117],"structural:":[119],"anonymised_prose":[120],"generated":[122],"same":[125],"we":[128],"parse":[129],"it":[132],"lossy":[135],"re-encoding,":[136],"not":[137,156],"richer":[139,184],"source.":[140],"ceiling":[142],"(CV":[143],"~=":[144,147],"0.25,":[145],"0.30)":[148],"reflects":[149],"information":[151],"content":[152],"this":[154,169],"dataset,":[155],"modeling":[158],"limitation.":[159],"In":[160],"characterizing":[161],"where":[162],"runs":[165],"out":[166],"why,":[168],"work":[170],"functions":[171],"as":[172],"benchmark":[174],"diagnostic":[175],"one":[177],"that":[178],"points":[179],"what":[182],"would":[186],"need":[187],"include.":[189]},"counts_by_year":[],"updated_date":"2026-04-03T16:44:17.987007","created_date":"2026-04-03T00:00:00"}
