<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
<url>
<loc>https://inferenceengineering.tech/</loc>
<lastmod>2026-06-14T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>1</priority>
</url>
<url>
<loc>https://inferenceengineering.tech/chapters/</loc>
<lastmod>2026-06-14T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.9</priority>
</url>
<url>
<loc>https://inferenceengineering.tech/exercises/</loc>
<lastmod>2026-06-14T00:00:00.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://inferenceengineering.tech/benchmarks/</loc>
<lastmod>2026-06-14T00:00:00.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://inferenceengineering.tech/learn/</loc>
<lastmod>2026-06-14T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.9</priority>
</url>
<url>
<loc>https://inferenceengineering.tech/paths/</loc>
<lastmod>2026-06-14T00:00:00.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://inferenceengineering.tech/cheat-sheets/</loc>
<lastmod>2026-06-14T00:00:00.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.7</priority>
</url>
<url>
<loc>https://inferenceengineering.tech/guides/</loc>
<lastmod>2026-06-14T00:00:00.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.7</priority>
</url>
<url>
<loc>https://inferenceengineering.tech/reading/</loc>
<lastmod>2026-06-14T00:00:00.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.6</priority>
</url>
<url>
<loc>https://inferenceengineering.tech/glossary/</loc>
<lastmod>2026-06-14T00:00:00.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.7</priority>
</url>
<url>
<loc>https://inferenceengineering.tech/free-pdf/</loc>
<lastmod>2026-06-14T00:00:00.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.7</priority>
</url>
<url>
<loc>https://inferenceengineering.tech/about/</loc>
<lastmod>2026-06-14T00:00:00.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.5</priority>
</url>
<url>
<loc>https://inferenceengineering.tech/chapters/preface/</loc>
<lastmod>2026-06-14T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.9</priority>
</url>
<url>
<loc>https://inferenceengineering.tech/chapters/inference/</loc>
<lastmod>2026-06-14T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.9</priority>
</url>
<url>
<loc>https://inferenceengineering.tech/chapters/prerequisites/</loc>
<lastmod>2026-06-14T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.9</priority>
</url>
<url>
<loc>https://inferenceengineering.tech/chapters/models/</loc>
<lastmod>2026-06-14T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.9</priority>
</url>
<url>
<loc>https://inferenceengineering.tech/chapters/hardware/</loc>
<lastmod>2026-06-14T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.9</priority>
</url>
<url>
<loc>https://inferenceengineering.tech/chapters/software/</loc>
<lastmod>2026-06-14T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.9</priority>
</url>
<url>
<loc>https://inferenceengineering.tech/chapters/techniques/</loc>
<lastmod>2026-06-14T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.9</priority>
</url>
<url>
<loc>https://inferenceengineering.tech/chapters/modalities/</loc>
<lastmod>2026-06-14T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.9</priority>
</url>
<url>
<loc>https://inferenceengineering.tech/chapters/production/</loc>
<lastmod>2026-06-14T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.9</priority>
</url>
<url>
<loc>https://inferenceengineering.tech/exercises/vram-calculator/</loc>
<lastmod>2026-06-14T00:00:00.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://inferenceengineering.tech/exercises/arithmetic-intensity/</loc>
<lastmod>2026-06-14T00:00:00.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://inferenceengineering.tech/exercises/kv-cache-sizing/</loc>
<lastmod>2026-06-14T00:00:00.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://inferenceengineering.tech/exercises/quantization-estimator/</loc>
<lastmod>2026-06-14T00:00:00.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://inferenceengineering.tech/exercises/speculative-decoding/</loc>
<lastmod>2026-06-14T00:00:00.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://inferenceengineering.tech/exercises/gpu-advisor/</loc>
<lastmod>2026-06-14T00:00:00.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://inferenceengineering.tech/exercises/hardware-recommender/</loc>
<lastmod>2026-06-14T00:00:00.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://inferenceengineering.tech/learn/vllm-vs-sglang-vs-tensorrt-llm/</loc>
<lastmod>2026-06-01T00:00:00.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://inferenceengineering.tech/learn/gpu-inference/</loc>
<lastmod>2026-06-01T00:00:00.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://inferenceengineering.tech/learn/ai-inference-hardware/</loc>
<lastmod>2026-06-01T00:00:00.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://inferenceengineering.tech/learn/llm-inference-acceleration/</loc>
<lastmod>2026-06-01T00:00:00.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://inferenceengineering.tech/paths/getting-started/</loc>
<lastmod>2026-06-14T00:00:00.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.7</priority>
</url>
<url>
<loc>https://inferenceengineering.tech/paths/architect/</loc>
<lastmod>2026-06-14T00:00:00.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.7</priority>
</url>
<url>
<loc>https://inferenceengineering.tech/paths/optimizer/</loc>
<lastmod>2026-06-14T00:00:00.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.7</priority>
</url>
<url>
<loc>https://inferenceengineering.tech/guides/serving-framework/</loc>
<lastmod>2026-06-14T00:00:00.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.7</priority>
</url>
<url>
<loc>https://inferenceengineering.tech/cheat-sheets/inference/</loc>
<lastmod>2026-06-14T00:00:00.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.6</priority>
</url>
<url>
<loc>https://inferenceengineering.tech/cheat-sheets/prerequisites/</loc>
<lastmod>2026-06-14T00:00:00.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.6</priority>
</url>
<url>
<loc>https://inferenceengineering.tech/cheat-sheets/models/</loc>
<lastmod>2026-06-14T00:00:00.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.6</priority>
</url>
<url>
<loc>https://inferenceengineering.tech/cheat-sheets/hardware/</loc>
<lastmod>2026-06-14T00:00:00.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.6</priority>
</url>
<url>
<loc>https://inferenceengineering.tech/cheat-sheets/software/</loc>
<lastmod>2026-06-14T00:00:00.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.6</priority>
</url>
<url>
<loc>https://inferenceengineering.tech/cheat-sheets/techniques/</loc>
<lastmod>2026-06-14T00:00:00.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.6</priority>
</url>
<url>
<loc>https://inferenceengineering.tech/cheat-sheets/modalities/</loc>
<lastmod>2026-06-14T00:00:00.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.6</priority>
</url>
<url>
<loc>https://inferenceengineering.tech/cheat-sheets/production/</loc>
<lastmod>2026-06-14T00:00:00.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.6</priority>
</url>
<url>
<loc>https://inferenceengineering.tech/glossary/kv-cache/</loc>
<lastmod>2026-06-14T00:00:00.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.5</priority>
</url>
<url>
<loc>https://inferenceengineering.tech/glossary/paged-attention/</loc>
<lastmod>2026-06-14T00:00:00.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.5</priority>
</url>
<url>
<loc>https://inferenceengineering.tech/glossary/multi-head-attention/</loc>
<lastmod>2026-06-14T00:00:00.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.5</priority>
</url>
<url>
<loc>https://inferenceengineering.tech/glossary/grouped-query-attention/</loc>
<lastmod>2026-06-14T00:00:00.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.5</priority>
</url>
<url>
<loc>https://inferenceengineering.tech/glossary/multi-query-attention/</loc>
<lastmod>2026-06-14T00:00:00.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.5</priority>
</url>
<url>
<loc>https://inferenceengineering.tech/glossary/flash-attention/</loc>
<lastmod>2026-06-14T00:00:00.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.5</priority>
</url>
<url>
<loc>https://inferenceengineering.tech/glossary/mixture-of-experts/</loc>
<lastmod>2026-06-14T00:00:00.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.5</priority>
</url>
<url>
<loc>https://inferenceengineering.tech/glossary/context-window/</loc>
<lastmod>2026-06-14T00:00:00.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.5</priority>
</url>
<url>
<loc>https://inferenceengineering.tech/glossary/embedding/</loc>
<lastmod>2026-06-14T00:00:00.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.5</priority>
</url>
<url>
<loc>https://inferenceengineering.tech/glossary/tokenization/</loc>
<lastmod>2026-06-14T00:00:00.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.5</priority>
</url>
<url>
<loc>https://inferenceengineering.tech/glossary/logits/</loc>
<lastmod>2026-06-14T00:00:00.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.5</priority>
</url>
<url>
<loc>https://inferenceengineering.tech/glossary/quantization/</loc>
<lastmod>2026-06-14T00:00:00.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.5</priority>
</url>
<url>
<loc>https://inferenceengineering.tech/glossary/int8/</loc>
<lastmod>2026-06-14T00:00:00.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.5</priority>
</url>
<url>
<loc>https://inferenceengineering.tech/glossary/fp8/</loc>
<lastmod>2026-06-14T00:00:00.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.5</priority>
</url>
<url>
<loc>https://inferenceengineering.tech/glossary/awq/</loc>
<lastmod>2026-06-14T00:00:00.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.5</priority>
</url>
<url>
<loc>https://inferenceengineering.tech/glossary/gptq/</loc>
<lastmod>2026-06-14T00:00:00.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.5</priority>
</url>
<url>
<loc>https://inferenceengineering.tech/glossary/smoothquant/</loc>
<lastmod>2026-06-14T00:00:00.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.5</priority>
</url>
<url>
<loc>https://inferenceengineering.tech/glossary/speculative-decoding/</loc>
<lastmod>2026-06-14T00:00:00.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.5</priority>
</url>
<url>
<loc>https://inferenceengineering.tech/glossary/draft-model/</loc>
<lastmod>2026-06-14T00:00:00.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.5</priority>
</url>
<url>
<loc>https://inferenceengineering.tech/glossary/eagle/</loc>
<lastmod>2026-06-14T00:00:00.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.5</priority>
</url>
<url>
<loc>https://inferenceengineering.tech/glossary/medusa/</loc>
<lastmod>2026-06-14T00:00:00.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.5</priority>
</url>
<url>
<loc>https://inferenceengineering.tech/glossary/prefix-caching/</loc>
<lastmod>2026-06-14T00:00:00.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.5</priority>
</url>
<url>
<loc>https://inferenceengineering.tech/glossary/radix-attention/</loc>
<lastmod>2026-06-14T00:00:00.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.5</priority>
</url>
<url>
<loc>https://inferenceengineering.tech/glossary/chunked-prefill/</loc>
<lastmod>2026-06-14T00:00:00.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.5</priority>
</url>
<url>
<loc>https://inferenceengineering.tech/glossary/disaggregated-inference/</loc>
<lastmod>2026-06-14T00:00:00.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.5</priority>
</url>
<url>
<loc>https://inferenceengineering.tech/glossary/continuous-batching/</loc>
<lastmod>2026-06-14T00:00:00.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.5</priority>
</url>
<url>
<loc>https://inferenceengineering.tech/glossary/vllm/</loc>
<lastmod>2026-06-14T00:00:00.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.5</priority>
</url>
<url>
<loc>https://inferenceengineering.tech/glossary/sglang/</loc>
<lastmod>2026-06-14T00:00:00.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.5</priority>
</url>
<url>
<loc>https://inferenceengineering.tech/glossary/tensorrt-llm/</loc>
<lastmod>2026-06-14T00:00:00.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.5</priority>
</url>
<url>
<loc>https://inferenceengineering.tech/glossary/llama-cpp/</loc>
<lastmod>2026-06-14T00:00:00.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.5</priority>
</url>
<url>
<loc>https://inferenceengineering.tech/glossary/text-generation-inference/</loc>
<lastmod>2026-06-14T00:00:00.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.5</priority>
</url>
<url>
<loc>https://inferenceengineering.tech/glossary/hbm/</loc>
<lastmod>2026-06-14T00:00:00.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.5</priority>
</url>
<url>
<loc>https://inferenceengineering.tech/glossary/vram/</loc>
<lastmod>2026-06-14T00:00:00.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.5</priority>
</url>
<url>
<loc>https://inferenceengineering.tech/glossary/memory-bandwidth/</loc>
<lastmod>2026-06-14T00:00:00.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.5</priority>
</url>
<url>
<loc>https://inferenceengineering.tech/glossary/flops/</loc>
<lastmod>2026-06-14T00:00:00.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.5</priority>
</url>
<url>
<loc>https://inferenceengineering.tech/glossary/tensor-core/</loc>
<lastmod>2026-06-14T00:00:00.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.5</priority>
</url>
<url>
<loc>https://inferenceengineering.tech/glossary/nvlink/</loc>
<lastmod>2026-06-14T00:00:00.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.5</priority>
</url>
<url>
<loc>https://inferenceengineering.tech/glossary/arithmetic-intensity/</loc>
<lastmod>2026-06-14T00:00:00.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.5</priority>
</url>
<url>
<loc>https://inferenceengineering.tech/glossary/roofline-model/</loc>
<lastmod>2026-06-14T00:00:00.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.5</priority>
</url>
<url>
<loc>https://inferenceengineering.tech/glossary/tensor-parallelism/</loc>
<lastmod>2026-06-14T00:00:00.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.5</priority>
</url>
<url>
<loc>https://inferenceengineering.tech/glossary/pipeline-parallelism/</loc>
<lastmod>2026-06-14T00:00:00.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.5</priority>
</url>
<url>
<loc>https://inferenceengineering.tech/glossary/expert-parallelism/</loc>
<lastmod>2026-06-14T00:00:00.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.5</priority>
</url>
<url>
<loc>https://inferenceengineering.tech/glossary/prefill-phase/</loc>
<lastmod>2026-06-14T00:00:00.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.5</priority>
</url>
<url>
<loc>https://inferenceengineering.tech/glossary/decode-phase/</loc>
<lastmod>2026-06-14T00:00:00.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.5</priority>
</url>
<url>
<loc>https://inferenceengineering.tech/glossary/ttft/</loc>
<lastmod>2026-06-14T00:00:00.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.5</priority>
</url>
<url>
<loc>https://inferenceengineering.tech/glossary/tps/</loc>
<lastmod>2026-06-14T00:00:00.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.5</priority>
</url>
<url>
<loc>https://inferenceengineering.tech/glossary/throughput/</loc>
<lastmod>2026-06-14T00:00:00.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.5</priority>
</url>
<url>
<loc>https://inferenceengineering.tech/glossary/latency/</loc>
<lastmod>2026-06-14T00:00:00.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.5</priority>
</url>
<url>
<loc>https://inferenceengineering.tech/glossary/slo/</loc>
<lastmod>2026-06-14T00:00:00.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.5</priority>
</url>
<url>
<loc>https://inferenceengineering.tech/glossary/sampling/</loc>
<lastmod>2026-06-14T00:00:00.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.5</priority>
</url>
<url>
<loc>https://inferenceengineering.tech/glossary/temperature/</loc>
<lastmod>2026-06-14T00:00:00.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.5</priority>
</url>
<url>
<loc>https://inferenceengineering.tech/glossary/beam-search/</loc>
<lastmod>2026-06-14T00:00:00.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.5</priority>
</url>
<url>
<loc>https://inferenceengineering.tech/glossary/activation/</loc>
<lastmod>2026-06-14T00:00:00.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.5</priority>
</url>
<url>
<loc>https://inferenceengineering.tech/glossary/weights/</loc>
<lastmod>2026-06-14T00:00:00.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.5</priority>
</url>
</urlset>
