{"dataType":"CVE_RECORD","dataVersion":"5.2","cveMetadata":{"cveId":"CVE-2026-53923","assignerOrgId":"a0819718-46f1-4df5-94e2-005712e83aaa","state":"PUBLISHED","assignerShortName":"GitHub_M","dateReserved":"2026-06-11T15:46:12.316Z","datePublished":"2026-06-22T21:55:42.001Z","dateUpdated":"2026-06-23T15:05:21.711Z"},"containers":{"cna":{"title":"vLLM GGUF Kernels: int64_t to int truncation of tensor dimensions causes GPU buffer overflow","problemTypes":[{"descriptions":[{"cweId":"CWE-681","lang":"en","description":"CWE-681: Incorrect Conversion between Numeric Types","type":"CWE"}]},{"descriptions":[{"cweId":"CWE-200","lang":"en","description":"CWE-200: Exposure of Sensitive Information to an Unauthorized Actor","type":"CWE"}]}],"metrics":[{"cvssV4_0":{"attackVector":"NETWORK","attackComplexity":"LOW","attackRequirements":"NONE","privilegesRequired":"NONE","userInteraction":"PASSIVE","vulnConfidentialityImpact":"LOW","vulnIntegrityImpact":"LOW","vulnAvailabilityImpact":"NONE","subConfidentialityImpact":"NONE","subIntegrityImpact":"NONE","subAvailabilityImpact":"NONE","baseScore":5.3,"baseSeverity":"MEDIUM","vectorString":"CVSS:4.0/AV:N/AC:L/AT:N/PR:N/UI:P/VC:L/VI:L/VA:N/SC:N/SI:N/SA:N","version":"4.0"}}],"references":[{"name":"https://github.com/vllm-project/vllm/security/advisories/GHSA-5jv2-g5wq-cmr4","tags":["x_refsource_CONFIRM"],"url":"https://github.com/vllm-project/vllm/security/advisories/GHSA-5jv2-g5wq-cmr4"},{"name":"https://github.com/vllm-project/vllm/pull/44971","tags":["x_refsource_MISC"],"url":"https://github.com/vllm-project/vllm/pull/44971"},{"name":"https://github.com/vllm-project/vllm/commit/f219788f91952827132fa4fdf916427cd20d225e","tags":["x_refsource_MISC"],"url":"https://github.com/vllm-project/vllm/commit/f219788f91952827132fa4fdf916427cd20d225e"}],"affected":[{"vendor":"vllm-project","product":"vllm","versions":[{"version":">= 0.5.5, < 0.23.1rc0","status":"affected"}]}],"providerMetadata":{"orgId":"a0819718-46f1-4df5-94e2-005712e83aaa","shortName":"GitHub_M","dateUpdated":"2026-06-22T21:55:42.001Z"},"descriptions":[{"lang":"en","value":"vLLM is an inference and serving engine for large language models (LLMs). From 0.5.5 until 0.23.1rc0, integer truncation of tensor dimensions in vLLM's GGUF dequantize kernels (csrc/quantization/gguf/gguf_kernel.cu) causes partial tensor processing. The output tensor is allocated at full size via torch::empty (uninitialized memory), but the dequantize CUDA kernel processes only a truncated number of elements. The unfilled portion of the output tensor retains whatever was previously in GPU memory. In multi-tenant inference deployments, this residual GPU memory may contain tensor data from other users' inference requests, constituting information disclosure. This vulnerability is fixed in 0.23.1rc0."}],"source":{"advisory":"GHSA-5jv2-g5wq-cmr4","discovery":"UNKNOWN"}},"adp":[{"metrics":[{"other":{"type":"ssvc","content":{"timestamp":"2026-06-23T15:04:15.555317Z","id":"CVE-2026-53923","options":[{"Exploitation":"none"},{"Automatable":"no"},{"Technical Impact":"partial"}],"role":"CISA Coordinator","version":"2.0.3"}}}],"title":"CISA ADP Vulnrichment","providerMetadata":{"orgId":"134c704f-9b21-4f2e-91b3-4a467353bcc0","shortName":"CISA-ADP","dateUpdated":"2026-06-23T15:05:21.711Z"}}]}}