{"api_version":"1","generated_at":"2026-06-23T03:08:15+00:00","cve":"CVE-2026-53923","urls":{"html":"https://cve.report/CVE-2026-53923","api":"https://cve.report/api/cve/CVE-2026-53923.json","docs":"https://cve.report/api","cve_org":"https://www.cve.org/CVERecord?id=CVE-2026-53923","nvd":"https://nvd.nist.gov/vuln/detail/CVE-2026-53923"},"summary":{"title":"vLLM GGUF Kernels: int64_t to int truncation of tensor dimensions causes GPU buffer overflow","description":"vLLM is an inference and serving engine for large language models (LLMs). From 0.5.5 until 0.23.1rc0, integer truncation of tensor dimensions in vLLM's GGUF dequantize kernels (csrc/quantization/gguf/gguf_kernel.cu) causes partial tensor processing. The output tensor is allocated at full size via torch::empty (uninitialized memory), but the dequantize CUDA kernel processes only a truncated number of elements. The unfilled portion of the output tensor retains whatever was previously in GPU memory. In multi-tenant inference deployments, this residual GPU memory may contain tensor data from other users' inference requests, constituting information disclosure. This vulnerability is fixed in 0.23.1rc0.","state":"PUBLISHED","assigner":"GitHub_M","published_at":"2026-06-22 23:16:30","updated_at":"2026-06-22 23:16:30"},"problem_types":["CWE-200","CWE-681","CWE-681 CWE-681: Incorrect Conversion between Numeric Types","CWE-200 CWE-200: Exposure of Sensitive Information to an Unauthorized Actor"],"metrics":[{"version":"4.0","source":"security-advisories@github.com","type":"Secondary","score":"5.3","severity":"MEDIUM","vector":"CVSS:4.0/AV:N/AC:L/AT:N/PR:N/UI:P/VC:L/VI:L/VA:N/SC:N/SI:N/SA:N/E:X/CR:X/IR:X/AR:X/MAV:X/MAC:X/MAT:X/MPR:X/MUI:X/MVC:X/MVI:X/MVA:X/MSC:X/MSI:X/MSA:X/S:X/AU:X/R:X/V:X/RE:X/U:X","data":{"version":"4.0","vectorString":"CVSS:4.0/AV:N/AC:L/AT:N/PR:N/UI:P/VC:L/VI:L/VA:N/SC:N/SI:N/SA:N/E:X/CR:X/IR:X/AR:X/MAV:X/MAC:X/MAT:X/MPR:X/MUI:X/MVC:X/MVI:X/MVA:X/MSC:X/MSI:X/MSA:X/S:X/AU:X/R:X/V:X/RE:X/U:X","baseScore":5.3,"baseSeverity":"MEDIUM","attackVector":"NETWORK","attackComplexity":"LOW","attackRequirements":"NONE","privilegesRequired":"NONE","userInteraction":"PASSIVE","vulnConfidentialityImpact":"LOW","vulnIntegrityImpact":"LOW","vulnAvailabilityImpact":"NONE","subConfidentialityImpact":"NONE","subIntegrityImpact":"NONE","subAvailabilityImpact":"NONE","exploitMaturity":"NOT_DEFINED","confidentialityRequirement":"NOT_DEFINED","integrityRequirement":"NOT_DEFINED","availabilityRequirement":"NOT_DEFINED","modifiedAttackVector":"NOT_DEFINED","modifiedAttackComplexity":"NOT_DEFINED","modifiedAttackRequirements":"NOT_DEFINED","modifiedPrivilegesRequired":"NOT_DEFINED","modifiedUserInteraction":"NOT_DEFINED","modifiedVulnConfidentialityImpact":"NOT_DEFINED","modifiedVulnIntegrityImpact":"NOT_DEFINED","modifiedVulnAvailabilityImpact":"NOT_DEFINED","modifiedSubConfidentialityImpact":"NOT_DEFINED","modifiedSubIntegrityImpact":"NOT_DEFINED","modifiedSubAvailabilityImpact":"NOT_DEFINED","Safety":"NOT_DEFINED","Automatable":"NOT_DEFINED","Recovery":"NOT_DEFINED","valueDensity":"NOT_DEFINED","vulnerabilityResponseEffort":"NOT_DEFINED","providerUrgency":"NOT_DEFINED"}},{"version":"4.0","source":"CNA","type":"DECLARED","score":"5.3","severity":"MEDIUM","vector":"CVSS:4.0/AV:N/AC:L/AT:N/PR:N/UI:P/VC:L/VI:L/VA:N/SC:N/SI:N/SA:N","data":{"attackComplexity":"LOW","attackRequirements":"NONE","attackVector":"NETWORK","baseScore":5.3,"baseSeverity":"MEDIUM","privilegesRequired":"NONE","subAvailabilityImpact":"NONE","subConfidentialityImpact":"NONE","subIntegrityImpact":"NONE","userInteraction":"PASSIVE","vectorString":"CVSS:4.0/AV:N/AC:L/AT:N/PR:N/UI:P/VC:L/VI:L/VA:N/SC:N/SI:N/SA:N","version":"4.0","vulnAvailabilityImpact":"NONE","vulnConfidentialityImpact":"LOW","vulnIntegrityImpact":"LOW"}}],"references":[{"url":"https://github.com/vllm-project/vllm/commit/f219788f91952827132fa4fdf916427cd20d225e","name":"https://github.com/vllm-project/vllm/commit/f219788f91952827132fa4fdf916427cd20d225e","refsource":"security-advisories@github.com","tags":[],"title":"","mime":"","httpstatus":"","archivestatus":"0"},{"url":"https://github.com/vllm-project/vllm/security/advisories/GHSA-5jv2-g5wq-cmr4","name":"https://github.com/vllm-project/vllm/security/advisories/GHSA-5jv2-g5wq-cmr4","refsource":"security-advisories@github.com","tags":[],"title":"","mime":"","httpstatus":"","archivestatus":"0"},{"url":"https://github.com/vllm-project/vllm/pull/44971","name":"https://github.com/vllm-project/vllm/pull/44971","refsource":"security-advisories@github.com","tags":[],"title":"","mime":"","httpstatus":"","archivestatus":"0"},{"url":"https://www.cve.org/CVERecord?id=CVE-2026-53923","name":"CVE Program record","refsource":"CVE.ORG","tags":["canonical"]},{"url":"https://nvd.nist.gov/vuln/detail/CVE-2026-53923","name":"NVD vulnerability detail","refsource":"NVD","tags":["canonical","analysis"]}],"affected":[{"source":"CNA","vendor":"vllm-project","product":"vllm","version":"affected >= 0.5.5, < 0.23.1rc0","platforms":[]}],"timeline":[],"solutions":[],"workarounds":[],"exploits":[],"credits":[],"nvd_cpes":[],"vendor_comments":[],"enrichments":{"kev":null,"epss":null,"legacy_qids":[]},"source_records":{"cve_program":{"containers":{"cna":{"affected":[{"product":"vllm","vendor":"vllm-project","versions":[{"status":"affected","version":">= 0.5.5, < 0.23.1rc0"}]}],"descriptions":[{"lang":"en","value":"vLLM is an inference and serving engine for large language models (LLMs). From 0.5.5 until 0.23.1rc0, integer truncation of tensor dimensions in vLLM's GGUF dequantize kernels (csrc/quantization/gguf/gguf_kernel.cu) causes partial tensor processing. The output tensor is allocated at full size via torch::empty (uninitialized memory), but the dequantize CUDA kernel processes only a truncated number of elements. The unfilled portion of the output tensor retains whatever was previously in GPU memory. In multi-tenant inference deployments, this residual GPU memory may contain tensor data from other users' inference requests, constituting information disclosure. This vulnerability is fixed in 0.23.1rc0."}],"metrics":[{"cvssV4_0":{"attackComplexity":"LOW","attackRequirements":"NONE","attackVector":"NETWORK","baseScore":5.3,"baseSeverity":"MEDIUM","privilegesRequired":"NONE","subAvailabilityImpact":"NONE","subConfidentialityImpact":"NONE","subIntegrityImpact":"NONE","userInteraction":"PASSIVE","vectorString":"CVSS:4.0/AV:N/AC:L/AT:N/PR:N/UI:P/VC:L/VI:L/VA:N/SC:N/SI:N/SA:N","version":"4.0","vulnAvailabilityImpact":"NONE","vulnConfidentialityImpact":"LOW","vulnIntegrityImpact":"LOW"}}],"problemTypes":[{"descriptions":[{"cweId":"CWE-681","description":"CWE-681: Incorrect Conversion between Numeric Types","lang":"en","type":"CWE"}]},{"descriptions":[{"cweId":"CWE-200","description":"CWE-200: Exposure of Sensitive Information to an Unauthorized Actor","lang":"en","type":"CWE"}]}],"providerMetadata":{"dateUpdated":"2026-06-22T21:55:42.001Z","orgId":"a0819718-46f1-4df5-94e2-005712e83aaa","shortName":"GitHub_M"},"references":[{"name":"https://github.com/vllm-project/vllm/security/advisories/GHSA-5jv2-g5wq-cmr4","tags":["x_refsource_CONFIRM"],"url":"https://github.com/vllm-project/vllm/security/advisories/GHSA-5jv2-g5wq-cmr4"},{"name":"https://github.com/vllm-project/vllm/pull/44971","tags":["x_refsource_MISC"],"url":"https://github.com/vllm-project/vllm/pull/44971"},{"name":"https://github.com/vllm-project/vllm/commit/f219788f91952827132fa4fdf916427cd20d225e","tags":["x_refsource_MISC"],"url":"https://github.com/vllm-project/vllm/commit/f219788f91952827132fa4fdf916427cd20d225e"}],"source":{"advisory":"GHSA-5jv2-g5wq-cmr4","discovery":"UNKNOWN"},"title":"vLLM GGUF Kernels: int64_t to int truncation of tensor dimensions causes GPU buffer overflow"}},"cveMetadata":{"assignerOrgId":"a0819718-46f1-4df5-94e2-005712e83aaa","assignerShortName":"GitHub_M","cveId":"CVE-2026-53923","datePublished":"2026-06-22T21:55:42.001Z","dateReserved":"2026-06-11T15:46:12.316Z","dateUpdated":"2026-06-22T21:55:42.001Z","state":"PUBLISHED"},"dataType":"CVE_RECORD","dataVersion":"5.2"},"nvd":{"publishedDate":"2026-06-22 23:16:30","lastModifiedDate":"2026-06-22 23:16:30","problem_types":["CWE-200","CWE-681","CWE-681 CWE-681: Incorrect Conversion between Numeric Types","CWE-200 CWE-200: Exposure of Sensitive Information to an Unauthorized Actor"],"metrics":{"cvssMetricV40":[{"source":"security-advisories@github.com","type":"Secondary","cvssData":{"version":"4.0","vectorString":"CVSS:4.0/AV:N/AC:L/AT:N/PR:N/UI:P/VC:L/VI:L/VA:N/SC:N/SI:N/SA:N/E:X/CR:X/IR:X/AR:X/MAV:X/MAC:X/MAT:X/MPR:X/MUI:X/MVC:X/MVI:X/MVA:X/MSC:X/MSI:X/MSA:X/S:X/AU:X/R:X/V:X/RE:X/U:X","baseScore":5.3,"baseSeverity":"MEDIUM","attackVector":"NETWORK","attackComplexity":"LOW","attackRequirements":"NONE","privilegesRequired":"NONE","userInteraction":"PASSIVE","vulnConfidentialityImpact":"LOW","vulnIntegrityImpact":"LOW","vulnAvailabilityImpact":"NONE","subConfidentialityImpact":"NONE","subIntegrityImpact":"NONE","subAvailabilityImpact":"NONE","exploitMaturity":"NOT_DEFINED","confidentialityRequirement":"NOT_DEFINED","integrityRequirement":"NOT_DEFINED","availabilityRequirement":"NOT_DEFINED","modifiedAttackVector":"NOT_DEFINED","modifiedAttackComplexity":"NOT_DEFINED","modifiedAttackRequirements":"NOT_DEFINED","modifiedPrivilegesRequired":"NOT_DEFINED","modifiedUserInteraction":"NOT_DEFINED","modifiedVulnConfidentialityImpact":"NOT_DEFINED","modifiedVulnIntegrityImpact":"NOT_DEFINED","modifiedVulnAvailabilityImpact":"NOT_DEFINED","modifiedSubConfidentialityImpact":"NOT_DEFINED","modifiedSubIntegrityImpact":"NOT_DEFINED","modifiedSubAvailabilityImpact":"NOT_DEFINED","Safety":"NOT_DEFINED","Automatable":"NOT_DEFINED","Recovery":"NOT_DEFINED","valueDensity":"NOT_DEFINED","vulnerabilityResponseEffort":"NOT_DEFINED","providerUrgency":"NOT_DEFINED"}}]},"configurations":[]},"legacy_mitre":{"record":{"CveYear":"2026","CveId":"53923","Ordinal":"1","Title":"vLLM GGUF Kernels: int64_t to int truncation of tensor dimension","CVE":"CVE-2026-53923","Year":"2026"},"notes":[{"CveYear":"2026","CveId":"53923","Ordinal":"1","NoteData":"vLLM is an inference and serving engine for large language models (LLMs). From 0.5.5 until 0.23.1rc0, integer truncation of tensor dimensions in vLLM's GGUF dequantize kernels (csrc/quantization/gguf/gguf_kernel.cu) causes partial tensor processing. The output tensor is allocated at full size via torch::empty (uninitialized memory), but the dequantize CUDA kernel processes only a truncated number of elements. The unfilled portion of the output tensor retains whatever was previously in GPU memory. In multi-tenant inference deployments, this residual GPU memory may contain tensor data from other users' inference requests, constituting information disclosure. This vulnerability is fixed in 0.23.1rc0.","Type":"Description","Title":"vLLM GGUF Kernels: int64_t to int truncation of tensor dimension"}]}}}