| | #!/bin/bash |
| |
|
| | |
| | source ~/git/llama.cpp/.venv/bin/activate |
| |
|
| | |
| | QUANTIZER=~/git/llama.cpp/build/bin/llama-quantize |
| |
|
| | |
| | THREADS=$(sysctl -n hw.logicalcpu) |
| | echo "Detected $THREADS threads." |
| |
|
| | |
| | INPUT_FILE=$(find . -maxdepth 1 -name "*[Ff]16.gguf" | head -n 1) |
| |
|
| | if [ -z "$INPUT_FILE" ]; then |
| | echo "Error: No F16 GGUF file found in the current directory." |
| | exit 1 |
| | fi |
| |
|
| | |
| | INPUT_FILE=${INPUT_FILE#./} |
| |
|
| | echo "Found input file: $INPUT_FILE" |
| |
|
| | |
| | TYPES=( |
| | "IQ3_M" |
| | "IQ3_XS" |
| | "IQ3_XXS" |
| | "IQ4_NL" |
| | "IQ4_XS" |
| | "Q3_K_L" |
| | "Q3_K_M" |
| | "Q3_K_S" |
| | "Q3_K_XL" |
| | "Q4_0" |
| | "Q4_1" |
| | "Q4_K_L" |
| | "Q4_K_M" |
| | "Q4_K_S" |
| | "Q5_K_L" |
| | "Q5_K_M" |
| | "Q5_K_S" |
| | "Q6_K" |
| | "Q6_K_L" |
| | "Q8_0" |
| | ) |
| |
|
| | echo "Starting batch quantization..." |
| | echo "----------------------------------------" |
| |
|
| | for TYPE in "${TYPES[@]}"; do |
| | |
| | |
| | |
| | OUTPUT_FILE="${INPUT_FILE/F16/$TYPE}" |
| | OUTPUT_FILE="${OUTPUT_FILE/f16/$TYPE}" |
| | |
| | |
| | if [ "$OUTPUT_FILE" == "$INPUT_FILE" ]; then |
| | OUTPUT_FILE="${INPUT_FILE%.gguf}-$TYPE.gguf" |
| | fi |
| |
|
| | echo "Quantizing to $TYPE..." |
| | "$QUANTIZER" "$INPUT_FILE" "$OUTPUT_FILE" "$TYPE" "$THREADS" |
| | |
| | EXIT_CODE=$? |
| | if [ $EXIT_CODE -eq 0 ]; then |
| | echo "✅ Successfully created $OUTPUT_FILE" |
| |
|
| | |
| | |
| | LIMIT_BYTES=42949672960 |
| | FILE_SIZE=$(stat -f%z "$OUTPUT_FILE") |
| |
|
| | if [ "$FILE_SIZE" -gt "$LIMIT_BYTES" ]; then |
| | echo "File size ($FILE_SIZE bytes) exceeds 40GB. Splitting into directory..." |
| | |
| | |
| | DIR_NAME="${OUTPUT_FILE%.gguf}" |
| | mkdir -p "$DIR_NAME" |
| | |
| | |
| | SPLIT_TOOL=~/git/llama.cpp/build/bin/llama-gguf-split |
| | |
| | echo " Splitting '$OUTPUT_FILE' into '$DIR_NAME/'..." |
| |
|
| | |
| | pushd "$DIR_NAME" > /dev/null |
| |
|
| | |
| | "$SPLIT_TOOL" --split-max-size 40G "../$OUTPUT_FILE" "$(basename "$OUTPUT_FILE" .gguf)" |
| | |
| | SPLIT_EXIT=$? |
| | |
| | |
| | popd > /dev/null |
| |
|
| | if [ $SPLIT_EXIT -eq 0 ]; then |
| | echo "✅ Split successful. Removing original large file." |
| | rm "$OUTPUT_FILE" |
| | else |
| | echo "❌ Splitting failed. Keeping original file." |
| | fi |
| | fi |
| |
|
| | else |
| | echo "❌ Failed to create $OUTPUT_FILE (Error code: $EXIT_CODE)" |
| | echo " (Note: '$TYPE' might not be a valid quantization type in this version of llama.cpp)" |
| | fi |
| | echo "----------------------------------------" |
| | done |
| |
|
| | echo "Batch quantization complete." |
| |
|