QuixiAI
/

Ina-v11.1-gguf

Text Generation

Model card Files Files and versions

Ina-v11.1-gguf / quantize_all.sh

ehartford's picture

Add files using upload-large-folder tool

793fac3 verified 3 months ago

history blame contribute delete

3.44 kB

	#!/bin/bash

	# Source the environment
	source ~/git/llama.cpp/.venv/bin/activate

	# Path to llama-quantize
	QUANTIZER=~/git/llama.cpp/build/bin/llama-quantize

	# Detect thread count for max performance (macOS)
	THREADS=$(sysctl -n hw.logicalcpu)
	echo "Detected $THREADS threads."

	# Find the input file (looking for F16 or f16 in the name in the current directory)
	INPUT_FILE=$(find . -maxdepth 1 -name "*[Ff]16.gguf" \| head -n 1)

	if [ -z "$INPUT_FILE" ]; then
	echo "Error: No F16 GGUF file found in the current directory."
	exit 1
	fi

	# Remove leading ./ for cleaner filenames
	INPUT_FILE=${INPUT_FILE#./}

	echo "Found input file: $INPUT_FILE"

	# List of quantization types requested
	TYPES=(
	"IQ3_M"
	"IQ3_XS"
	"IQ3_XXS"
	"IQ4_NL"
	"IQ4_XS"
	"Q3_K_L"
	"Q3_K_M"
	"Q3_K_S"
	"Q3_K_XL"
	"Q4_0"
	"Q4_1"
	"Q4_K_L"
	"Q4_K_M"
	"Q4_K_S"
	"Q5_K_L"
	"Q5_K_M"
	"Q5_K_S"
	"Q6_K"
	"Q6_K_L"
	"Q8_0"
	)

	echo "Starting batch quantization..."
	echo "----------------------------------------"

	for TYPE in "${TYPES[@]}"; do
	# Construct output filename by replacing F16 or f16 with the quant type
	# Using python to handle case-insensitive replacement safely if needed, or simple bash substitution
	# Simple bash substitution for F16 and f16:
	OUTPUT_FILE="${INPUT_FILE/F16/$TYPE}"
	OUTPUT_FILE="${OUTPUT_FILE/f16/$TYPE}"

	# If substitution didn't happen (filename matches neither), just append type
	if [ "$OUTPUT_FILE" == "$INPUT_FILE" ]; then
	OUTPUT_FILE="${INPUT_FILE%.gguf}-$TYPE.gguf"
	fi

	echo "Quantizing to $TYPE..."
	"$QUANTIZER" "$INPUT_FILE" "$OUTPUT_FILE" "$TYPE" "$THREADS"

	EXIT_CODE=$?
	if [ $EXIT_CODE -eq 0 ]; then
	echo "✅ Successfully created $OUTPUT_FILE"

	# Check for file size and split if necessary (Limit: 40GB)
	# 40GB in bytes = 52949672960 (using 1024^3 * 40)
	LIMIT_BYTES=42949672960
	FILE_SIZE=$(stat -f%z "$OUTPUT_FILE")

	if [ "$FILE_SIZE" -gt "$LIMIT_BYTES" ]; then
	echo "File size ($FILE_SIZE bytes) exceeds 40GB. Splitting into directory..."

	# Create directory name (remove .gguf extension)
	DIR_NAME="${OUTPUT_FILE%.gguf}"
	mkdir -p "$DIR_NAME"

	# Split tool path
	SPLIT_TOOL=~/git/llama.cpp/build/bin/llama-gguf-split

	echo " Splitting '$OUTPUT_FILE' into '$DIR_NAME/'..."

	# Change to the new directory to run the split command
	pushd "$DIR_NAME" > /dev/null

	# Run split command: Flags first, then IN, then OUT prefix
	"$SPLIT_TOOL" --split-max-size 40G "../$OUTPUT_FILE" "$(basename "$OUTPUT_FILE" .gguf)"

	SPLIT_EXIT=$?

	# Change back to original directory
	popd > /dev/null

	if [ $SPLIT_EXIT -eq 0 ]; then
	echo "✅ Split successful. Removing original large file."
	rm "$OUTPUT_FILE"
	else
	echo "❌ Splitting failed. Keeping original file."
	fi
	fi

	else
	echo "❌ Failed to create $OUTPUT_FILE (Error code: $EXIT_CODE)"
	echo " (Note: '$TYPE' might not be a valid quantization type in this version of llama.cpp)"
	fi
	echo "----------------------------------------"
	done

	echo "Batch quantization complete."