Serious local LLM workstation under $10k (2026)

Two RTX 5090s, a Threadripper for PCIe lanes, and 128 GB of DDR5. 64 GB of aggregate VRAM at the lowest cost-per-GB on this tier — and it sits on your desk.

Job-to-be-done · Run 70B+ models comfortably, multi-GPU agentic workflows, or LoRA fine-tunes — all locally.

const{Fragment:e,jsx:n,jsxs:t}=arguments[0];function _createMdxContent(r){const i={h2:"h2",li:"li",p:"p",strong:"strong",table:"table",tbody:"tbody",td:"td",th:"th",thead:"thead",tr:"tr",ul:"ul",...r.components};return t(e,{children:[n(i.h2,{children:"The job"}),"\n",n(i.p,{children:"You want to run 70B-class models at home with room to breathe. A 70B at 4-bit fits in roughly 40 GB of VRAM with a 32k context; at 8-bit it wants closer to 75 GB once you load KV cache. You also want headroom for agentic workflows that pin two or three smaller models in memory at once, or LoRA fine-tunes on mid-scale bases. You're not chasing datacenter throughput. You're not pretraining. You want a single tower that earns its keep when the cloud bill stops being a rounding error."}),"\n",n(i.p,{children:"This guide is not for you if:"}),"\n",t(i.ul,{children:["\n",n(i.li,{children:"You need 405B-class models. Multi-node problem, different blueprint."}),"\n",n(i.li,{children:"You want pretraining capacity. Different scale of compute entirely."}),"\n",n(i.li,{children:"You need a quiet office. Two 5090s are loud."}),"\n"]}),"\n",n(i.h2,{children:"The build"}),"\n",t(i.table,{children:[n(i.thead,{children:t(i.tr,{children:[n(i.th,{children:"Part"}),n(i.th,{children:"Pick"}),n(i.th,{children:"Why"})]})}),t(i.tbody,{children:[t(i.tr,{children:[n(i.td,{children:"GPU"}),n(i.td,{children:"2x NVIDIA RTX 5090 (32 GB each)"}),n(i.td,{children:"64 GB aggregate VRAM at $4,000. 1,792 GB/s per card. Tensor-split is mature in vLLM and llama.cpp."})]}),t(i.tr,{children:[n(i.td,{children:"CPU"}),n(i.td,{children:"AMD Threadripper 7970X (32-core)"}),n(i.td,{children:"48 PCIe 5.0 lanes feed two x16 GPU slots without bifurcation tricks. ~$2,500 street."})]}),t(i.tr,{children:[n(i.td,{children:"RAM"}),n(i.td,{children:"128 GB DDR5-5600 ECC RDIMM (4x32)"}),n(i.td,{children:"Quad-channel matches the platform. ECC because long fine-tune runs deserve it."})]}),t(i.tr,{children:[n(i.td,{children:"Storage"}),n(i.td,{children:"2 TB Samsung 990 Pro NVMe + 4 TB secondary"}),n(i.td,{children:"Hot models on the fast drive; weights, datasets, checkpoints on the bulk."})]}),t(i.tr,{children:[n(i.td,{children:"PSU"}),n(i.td,{children:"Corsair AX1600i (1600 W, Titanium)"}),n(i.td,{children:"Two 5090s pull a peak 1,150 W under transient spikes. 1500 W is the floor; 1600 W with margin is the answer."})]}),t(i.tr,{children:[n(i.td,{children:"Case"}),n(i.td,{children:"Fractal Define 7 XL or Phanteks Enthoo Pro 2"}),n(i.td,{children:"E-ATX, 8+ slots, airflow for 1,150 W of GPU heat. Two 5090s need real space."})]}),t(i.tr,{children:[n(i.td,{children:"OS"}),n(i.td,{children:"Ubuntu 24.04 LTS"}),n(i.td,{children:"CUDA 13 lands cleanly. NVIDIA's open driver is the default for Blackwell. WSL2 if Windows is non-negotiable."})]})]})]}),"\n",n(i.h2,{children:"Numbers"}),"\n",t(i.ul,{children:["\n",t(i.li,{children:[n(i.strong,{children:"70B at 4-bit, 32k context"})," — ~30-45 tok/s with tensor-parallel-2 in vLLM. Both cards loaded ~80%."]}),"\n",t(i.li,{children:[n(i.strong,{children:"70B at 8-bit, 32k context"})," — ~18-25 tok/s. KV cache fits across the two cards."]}),"\n",t(i.li,{children:[n(i.strong,{children:"123B-class at 4-bit"})," — runs at 64 GB but tight; expect ~12-18 tok/s and no room for long context."]}),"\n",t(i.li,{children:[n(i.strong,{children:"LoRA on a 13B base"})," — comfortable headroom, batch size 4-8 depending on sequence length."]}),"\n"]}),"\n",n(i.h2,{children:"Tradeoffs"}),"\n",t(i.ul,{children:["\n",t(i.li,{children:[n(i.strong,{children:"DGX Spark instead"})," — 128 GB of unified LPDDR5X at 273 GB/s for $4,699. Twice the addressable memory but a sixth of the bandwidth per byte you actually move. Wins on the largest models that won't fit in 64 GB; loses on token throughput for everything that does."]}),"\n",t(i.li,{children:[n(i.strong,{children:"Single RTX PRO 6000 Blackwell"})," — 96 GB GDDR7 ECC on one card, 1,792 GB/s, ECC. No multi-GPU plumbing. Partner pricing typically $7,000-$9,000, which leaves a thin budget for the rest of the rig. Right answer if you hate tensor-parallel debugging or need ECC VRAM specifically."]}),"\n",t(i.li,{children:[n(i.strong,{children:"Cloud H200/B200 instances"})," — burstable for occasional 405B work. Math flips against you above ~200 hours of monthly use, and the latency is never local."]}),"\n"]}),"\n",n(i.h2,{children:"What this doesn't get you"}),"\n",t(i.ul,{children:["\n",n(i.li,{children:"405B-class models at full precision. That's a multi-node problem."}),"\n",n(i.li,{children:"Pretraining anything serious. Different scale of compute."}),"\n",n(i.li,{children:"Quiet operation. Two 5090s under load are loud, and 1,150 W of heat has to go somewhere."}),"\n",n(i.li,{children:"A path to NVLink. Consumer Blackwell skipped it; the cards talk over PCIe 5.0."}),"\n"]})]})}return{default:function(e={}){const{wrapper:t}=e.components||{};return t?n(t,{...e,children:n(_createMdxContent,{...e})}):_createMdxContent(e)}};