Local LLM rig under $1k (2026)

A $1k rig won't run a 70B model at production speed. It will run 14B-class models in Q8 with full context, or 32B in Q4 — fast enough for daily dev work, no cloud bill.

Job-to-be-done · Run 14-32B models at home on a budget rig — small models, full context.

const{Fragment:e,jsx:n,jsxs:t}=arguments[0];function _createMdxContent(r){const i={h2:"h2",li:"li",p:"p",strong:"strong",table:"table",tbody:"tbody",td:"td",th:"th",thead:"thead",tr:"tr",ul:"ul",...r.components};return t(e,{children:[n(i.h2,{children:"The job"}),"\n",n(i.p,{children:"You have $1,000 and you want a local LLM rig that runs 14-32B models at usable speed, with enough context to actually work — not toy demos. You'd rather own the hardware than rent tokens. You accept that a 70B model at this budget is a stretch goal, not the daily driver."}),"\n",n(i.p,{children:"This guide is not for you if:"}),"\n",t(i.ul,{children:["\n",n(i.li,{children:"You need to fine-tune anything past a small LoRA. Buy more VRAM."}),"\n",n(i.li,{children:"You want 70B inference at coding-assistant latency. That's a $4k floor."}),"\n",n(i.li,{children:"You're doing image or video generation as the primary load. Different math."}),"\n"]}),"\n",n(i.h2,{children:"The build"}),"\n",t(i.table,{children:[n(i.thead,{children:t(i.tr,{children:[n(i.th,{children:"Part"}),n(i.th,{children:"Pick"}),n(i.th,{children:"Why"})]})}),t(i.tbody,{children:[t(i.tr,{children:[n(i.td,{children:"GPU"}),n(i.td,{children:"NVIDIA RTX 5070 Ti (16 GB)"}),n(i.td,{children:"$749 MSRP, GDDR7, 256-bit bus, current driver and CUDA support, full warranty."})]}),t(i.tr,{children:[n(i.td,{children:"CPU"}),n(i.td,{children:"AMD Ryzen 5 7600"}),n(i.td,{children:"Six cores is plenty when the GPU does the work. Frees ~$150 vs the 7700X for storage and PSU."})]}),t(i.tr,{children:[n(i.td,{children:"RAM"}),n(i.td,{children:"32 GB DDR5-6000 (2x16 GB)"}),n(i.td,{children:"Two-stick kit hits 6,000 MT/s on AM5 without drama. 32 GB is the floor for spilling layers and running a browser."})]}),t(i.tr,{children:[n(i.td,{children:"Storage"}),n(i.td,{children:"1 TB NVMe Gen 4 (WD SN770 / Crucial P3 Plus)"}),n(i.td,{children:"Models are big. A 70B Q4 file is roughly 40 GB. Gen 4 is the cheap-fast tier; Gen 5 is wasted here."})]}),t(i.tr,{children:[n(i.td,{children:"PSU"}),n(i.td,{children:"750 W 80+ Gold (Corsair RM750e)"}),n(i.td,{children:"Vendor-recommended floor for the 5070 Ti's 300 W TGP. Don't cheap out — bad PSUs cost GPUs."})]}),t(i.tr,{children:[n(i.td,{children:"Case"}),n(i.td,{children:"Mid-tower with mesh front"}),n(i.td,{children:"The 5070 Ti dumps 300 W as heat. Airflow over looks."})]}),t(i.tr,{children:[n(i.td,{children:"OS"}),n(i.td,{children:"Ubuntu 24.04 LTS or Windows 11"}),n(i.td,{children:"Linux for fewer driver fights with vLLM/llama.cpp; Windows if you need it for other reasons."})]})]})]}),"\n",n(i.p,{children:"Total lands at roughly $1,050-1,100 with current street prices. Trim the case or shop a 7600 sale to crack $1k flat."}),"\n",n(i.h2,{children:"Numbers"}),"\n",t(i.ul,{children:["\n",t(i.li,{children:[n(i.strong,{children:"14B-class at Q8"})," — ~35-50 tok/s in llama.cpp, full 32k context fits in 16 GB."]}),"\n",t(i.li,{children:[n(i.strong,{children:"32B-class at Q4_K_M"})," — ~18-25 tok/s, 8-16k context comfortable."]}),"\n",t(i.li,{children:[n(i.strong,{children:"70B-class at IQ2_XXS"})," — runs, ~5-8 tok/s, quality drops noticeably. Demo only."]}),"\n",t(i.li,{children:[n(i.strong,{children:"Idle draw"})," — ~60 W at the wall. Reasonable to leave on."]}),"\n"]}),"\n",n(i.h2,{children:"Tradeoffs"}),"\n",t(i.ul,{children:["\n",t(i.li,{children:[n(i.strong,{children:"Used RTX 3090 (24 GB)"}),": street price $700-900. Eight GB more VRAM than the 5070 Ti, which lets a 32B Q5 fit and gives 70B IQ3 a real shot. You give up warranty, lose the GDDR7 bandwidth, and inherit whatever a stranger did to the card. Worth it if you're patient and know how to inspect a used GPU."]}),"\n",t(i.li,{children:[n(i.strong,{children:"RTX 5070 (non-Ti)"}),": $549 MSRP, 12 GB VRAM. Saves $200 but the 12 GB ceiling pushes 14B models into Q4 territory and rules out 32B at any sane quant. Skip unless the budget is hard."]}),"\n",t(i.li,{children:[n(i.strong,{children:"Apple M4 Mac mini base"}),": $599, 16 GB unified. Quiet, efficient, sips power. Tok/s on a 14B model is roughly half the 5070 Ti and the toolchain (MLX, llama.cpp Metal) lags CUDA on day-one model support. A nice second machine, not a primary rig."]}),"\n"]}),"\n",n(i.h2,{children:"What this doesn't get you"}),"\n",t(i.ul,{children:["\n",n(i.li,{children:"70B at production speed. That's the next budget tier."}),"\n",n(i.li,{children:"Multi-GPU. One PCIe slot, one card, one PSU rail."}),"\n",n(i.li,{children:"A fine-tuning station. Inference rig only."}),"\n",n(i.li,{children:"Headroom for the next generation of frontier-quality open weights without a quant compromise."}),"\n"]})]})}return{default:function(e={}){const{wrapper:t}=e.components||{};return t?n(t,{...e,children:n(_createMdxContent,{...e})}):_createMdxContent(e)}};