Image and video generation rig under $4k (2026)

Single RTX 5090, 64 GB DDR5, 4 TB NVMe. Built for sustained image and short-video runs — Flux, SDXL, Wan 2.2 — without the cloud bill or the queue.

Job-to-be-done · Generate images and short videos locally — Flux, SDXL, Wan 2.2 — at production iteration speeds.

const{Fragment:e,jsx:n,jsxs:i}=arguments[0];function _createMdxContent(t){const r={h2:"h2",li:"li",p:"p",strong:"strong",table:"table",tbody:"tbody",td:"td",th:"th",thead:"thead",tr:"tr",ul:"ul",...t.components};return i(e,{children:[n(r.h2,{children:"The job"}),"\n",n(r.p,{children:"You generate images and short videos locally. Flux for stills you actually\nship. SDXL when speed matters more than fidelity. Wan 2.2 when the brief\ncalls for motion. You iterate in dozens-to-hundreds per session, not ones,\nand you're tired of waiting on a shared cloud queue at 9pm. You have ~$4k\nand you'd like to spend it once."}),"\n",n(r.p,{children:"The shape of this workload is different from an LLM rig. VRAM ceilings are\nsofter — Flux fits in 24 GB, SDXL fits in 12 GB, Wan 2.2 scales with what\nyou give it. What hurts is everything around the model: the checkpoint\nstack, the LoRA library, the VAE intermediates, the sustained 100% GPU\ndraw across a multi-hour session."}),"\n",n(r.p,{children:"This guide is not for you if:"}),"\n",i(r.ul,{children:["\n",n(r.li,{children:"LLM inference is the primary load. Different math, different rig."}),"\n",n(r.li,{children:"You need real-time video. Wan 2.2 is minutes per clip."}),"\n",n(r.li,{children:"You're training a foundation model. This is an inference + small-LoRA box."}),"\n"]}),"\n",n(r.h2,{children:"The build"}),"\n",i(r.table,{children:[n(r.thead,{children:i(r.tr,{children:[n(r.th,{children:"Part"}),n(r.th,{children:"Pick"}),n(r.th,{children:"Why"})]})}),i(r.tbody,{children:[i(r.tr,{children:[n(r.td,{children:"GPU"}),n(r.td,{children:"NVIDIA RTX 5090 (32 GB)"}),n(r.td,{children:"32 GB GDDR7, 1,792 GB/s memory bandwidth. Flux + LoRA + ControlNet stack fits with room to spare."})]}),i(r.tr,{children:[n(r.td,{children:"CPU"}),n(r.td,{children:"AMD Ryzen 9 9950X"}),n(r.td,{children:"16 cores soak VAE decode, image preprocessing, and ffmpeg encode without choking the GPU pipeline."})]}),i(r.tr,{children:[n(r.td,{children:"RAM"}),n(r.td,{children:"64 GB DDR5-6000 (2x32)"}),n(r.td,{children:"VAE tiles, model swaps, and Wan 2.2 intermediates spill into system RAM. 32 GB runs out the moment you queue a batch."})]}),i(r.tr,{children:[n(r.td,{children:"Storage"}),n(r.td,{children:"4 TB Samsung 990 Pro NVMe (Gen 4)"}),n(r.td,{children:"A working checkpoint + LoRA library is 1-2 TB before you notice. Cold-loading models from a slow disk wastes session time."})]}),i(r.tr,{children:[n(r.td,{children:"PSU"}),n(r.td,{children:"1000 W 80+ Gold"}),n(r.td,{children:"RTX 5090 draws 575 W TGP. Headroom for transient spikes and a sustained-load duty cycle."})]}),i(r.tr,{children:[n(r.td,{children:"Case"}),n(r.td,{children:"Fractal Define 7 / Lian Li O11D EVO"}),n(r.td,{children:"Three intake fans minimum. Sustained compute is the workload — burst-tuned cases thermal-throttle by hour two."})]}),i(r.tr,{children:[n(r.td,{children:"OS"}),n(r.td,{children:"Windows 11 + WSL2, or Ubuntu 24.04"}),n(r.td,{children:"ComfyUI, Wan2GP, Forge all run on either. Pick what your toolchain already targets."})]})]})]}),"\n",n(r.p,{children:"Approximate total: $3,800. GPU is $1,999 of that."}),"\n",n(r.h2,{children:"Numbers"}),"\n",i(r.ul,{children:["\n",i(r.li,{children:[n(r.strong,{children:"SDXL 1024x1024"})," — 4-7 sec per image."]}),"\n",i(r.li,{children:[n(r.strong,{children:"Flux 1.dev 1024x1024"})," — 12-20 sec per image."]}),"\n",i(r.li,{children:[n(r.strong,{children:"Wan 2.2 short clip"})," — minutes per clip; varies wildly with length, resolution, and steps."]}),"\n",i(r.li,{children:[n(r.strong,{children:"SDXL character LoRA training"})," — under an hour on a small dataset."]}),"\n"]}),"\n",n(r.h2,{children:"Tradeoffs"}),"\n",i(r.ul,{children:["\n",i(r.li,{children:[n(r.strong,{children:"Drop to a 4090 (24 GB), spend the savings on storage."})," You lose the 32 GB Flux-plus-everything-loaded headroom and the GDDR7 bandwidth, but you keep most of the throughput. Reasonable if you found a deal."]}),"\n",i(r.li,{children:[n(r.strong,{children:"Drop the GPU to a 5080 (16 GB)."})," Don't. SDXL is fine; Flux gets tight; Wan 2.2 starts forcing offloads. Rigs you have to fight aren't fun rigs."]}),"\n",i(r.li,{children:[n(r.strong,{children:"Add a second 5090 later."})," ComfyUI parallelizes batch jobs across GPUs cleanly. Leave PSU headroom and a free PCIe slot now if this is the plan."]}),"\n"]}),"\n",n(r.h2,{children:"What this doesn't get you"}),"\n",i(r.ul,{children:["\n",n(r.li,{children:"Real-time video generation. Wan 2.2 is minutes per clip, not frames per second."}),"\n",n(r.li,{children:"Training a foundation model. This is an inference + small-LoRA rig, not an H100 substitute."}),"\n",n(r.li,{children:"A quiet room. 575 W of sustained GPU draw is going to be audible."}),"\n"]})]})}return{default:function(e={}){const{wrapper:i}=e.components||{};return i?n(i,{...e,children:n(_createMdxContent,{...e})}):_createMdxContent(e)}};