Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Here are BitsFusion Script Quantisation without finetuned #5

s9anus98a opened this issue Jun 18, 2024 · 3 comments

Here are BitsFusion Script Quantisation without finetuned #5

s9anus98a opened this issue Jun 18, 2024 · 3 comments


Copy link

import torch
import copy
import numpy as np
from transformers import CLIPProcessor, CLIPModel
from diffusers import StableDiffusionPipeline
from scipy.cluster.vq import vq, kmeans2

# Konfigurasi
model_path = "path/to/your/sd-v1-5-fp16.safetensors"
output_path = "path/to/your/quantized_model.safetensors"
bits = [1, 2, 3, 4]  # Bit-width yang akan diuji
sensitivity_threshold = 0.05
size_factor = 0.5
clip_thresholds = [0.9, 0.95, 0.98]
time_steps = 50

# Fungsi untuk menghitung Mean Squared Error (MSE)
def calculate_mse(image1, image2):
  """Menghitung Mean Squared Error (MSE) antara dua gambar."""
  return ((image1 - image2) ** 2).mean()

# Fungsi untuk menghitung CLIP score
def calculate_clip_score(images, texts, clip_processor, clip_model):
  """Menghitung CLIP score untuk gambar yang dihasilkan dan teks prompt."""
  inputs = clip_processor(text=texts, images=images, return_tensors="pt", padding=True)
  outputs = clip_model(**inputs)
  logits_per_image = outputs.logits_per_image
  return logits_per_image.diag().mean().item()

# Fungsi untuk menganalisis sensitivitas layer
def analyze_layer_sensitivity(model, prompts, bits, sample_size=100):
  """Menganalisis sensitivitas layer terhadap kuantisasi."""
  clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
  clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
  results = {}
  for name, param in model.named_parameters():
    if "weight" in name:
      results[name] = {}
      for b in bits:
        quantized_model = copy.deepcopy(model)
        quantize_layer(quantized_model, name, b)
        # Generate gambar dengan model yang telah dikuantisasi (implementasi diperlukan)
        generated_images = generate_images(quantized_model, prompts, sample_size)
        # Hitung metrik
        results[name][b] = {
            "mse": calculate_mse(generated_images, generate_images(model, prompts, sample_size)),
            "clip_score": calculate_clip_score(generated_images, prompts, clip_processor, clip_model),
            "parameters": param.numel()
  return results

# Fungsi untuk mengkuantisasi layer dengan Lloyd-Max dari SciPy
def quantize_layer(model, layer_name, bits):
  """Menerapkan kuantisasi pada layer tertentu menggunakan algoritma Lloyd-Max dari SciPy.

    model: Model PyTorch yang mengandung layer yang akan di kuantisasikan.
    layer_name: Nama layer yang akan di kuantisasikan (string).
    bits: Jumlah bit yang digunakan untuk kuantisasi (integer).

  # Dapatkan layer berdasarkan namanya
  layer = dict(model.named_modules())[layer_name]

  # Pastikan layer memiliki bobot
  if not hasattr(layer, 'weight'):
    print(f"Layer {layer_name} tidak memiliki bobot.")

  # Dapatkan tensor bobot
  w =

  # Lewati kuantisasi jika bobot sudah dalam tipe data integer
  if w.dtype not in [torch.float, torch.float16, torch.float32, torch.float64]:
    print(f"Layer {layer_name} sudah memiliki bobot integer.")

  # Konversi tensor ke CPU untuk kuantisasi Lloyd-Max
  w = w.cpu().numpy()

  # Lakukan kuantisasi Lloyd-Max menggunakan kmeans2 dari SciPy
  centroids, labels = kmeans2(w.reshape(-1, 1), 2**bits, minit='points')
  quantized_w = centroids[labels].reshape(w.shape)

  # Konversi kembali ke tensor PyTorch dan simpan ke layer = torch.from_numpy(quantized_w).to(layer.weight.device)

  # Hitung dan simpan faktor skala dan titik nol
  min_val = w.min()
  max_val = w.max()
  scale = (max_val - min_val) / (2**bits - 1)
  zero_point = torch.round(-min_val / scale).to(layer.weight.device)
  layer.quantization_scale =
  layer.quantization_zero_point = zero_point

  # Ganti fungsi forward untuk melakukan dekuantisasi saat inferensi
  layer._forward_impl = layer.forward
  def quantized_forward(*args, **kwargs):
    # Dekuantized bobot sebelum operasi forward
    dequantized_w = ( - layer.quantization_zero_point) * layer.quantization_scale
    return layer._forward_impl(dequantized_w, *args[1:], **kwargs)
  layer.forward = quantized_forward

# Fungsi untuk menghasilkan gambar
def generate_images(model, prompts, sample_size):
  """Menghasilkan gambar menggunakan model Stable Diffusion."""
  # Implementasikan proses generasi gambar menggunakan pipeline diffusers
  # Gunakan `model` dan `prompts` sebagai input
  # ...

# Fungsi untuk menentukan strategi mixed-precision
def determine_mixed_precision(results, sensitivity_threshold, size_factor, clip_thresholds):
  """Menentukan bit-width optimal untuk setiap layer."""
  mixed_precision = {}
  for name, layer_results in results.items():
    sensitivity_scores = {
        b: layer_results[b]["mse"] / (layer_results[b]["parameters"] ** size_factor)
        for b in bits
    optimal_bits = min(bits, key=lambda b: sensitivity_scores[b])
    if sensitivity_scores[optimal_bits] > sensitivity_threshold:
      optimal_bits = max(bits)  # Gunakan bit-width maksimum jika melebihi ambang batas
    clip_score_drop = layer_results[max(bits)]["clip_score"] - results[name][32]["clip_score"]
    for i, threshold in enumerate(clip_thresholds):
      if clip_score_drop > np.quantile(
          [results[n][32]["clip_score"] for n in results], threshold
        optimal_bits += i + 1
    mixed_precision[name] = optimal_bits
  return mixed_precision

# Fungsi untuk mengkuantisasi model
def quantize_model(model, mixed_precision):
  """Menerapkan kuantisasi pada model berdasarkan strategi mixed-precision."""
  for name, param in model.named_parameters():
    if "weight" in name and name in mixed_precision:
      quantize_layer(model, name, mixed_precision[name])
  return model

# Fungsi untuk pre-komputasi dan caching time embedding
def precompute_time_embeddings(model, time_steps):
  """Menghitung dan menyimpan time embedding."""
  time_embeddings = {}
  for t in range(time_steps):
    time_embeddings[t] = model.time_embedding(torch.tensor([t]))
  return time_embeddings

# Memuat model Stable Diffusion
pipe = StableDiffusionPipeline.from_pretrained(model_path, torch_dtype=torch.float16)
model = pipe.unet

# Analisis sensitivitas layer (implementasikan generate_images terlebih dahulu)
prompts = ["A photo of a cat"]  # Ganti dengan prompt yang Anda inginkan
results = analyze_layer_sensitivity(model, prompts, bits)

# Menentukan strategi mixed-precision
mixed_precision = determine_mixed_precision(
    results, sensitivity_threshold, size_factor, clip_thresholds

# Menerapkan kuantisasi pada model
quantized_model = quantize_model(model, mixed_precision)

# Pre-komputasi dan caching time embedding
time_embeddings = precompute_time_embeddings(quantized_model, time_steps)

# Menyimpan model yang telah dikuantisasi, output_path)
Copy link

Hi, did you implement this?

Copy link

charlesrwest commented Sep 12, 2024

Here's the same code with the comments translated. attached

Copy link


import torch
import copy
import numpy as np
from transformers import CLIPProcessor, CLIPModel
from diffusers import StableDiffusionPipeline
from scipy.cluster.vq import kmeans2

# Configuration
model_path = "path/to/your/sd-v1-5-fp16.safetensors"
output_path = "path/to/your/quantized_model.safetensors"
bits = [1, 2, 3, 4]  # Bit-widths to be tested
sensitivity_threshold = 0.05
size_factor = 0.5
clip_thresholds = [0.9, 0.95, 0.98]
time_steps = 50

# Function to calculate Mean Squared Error (MSE)
def calculate_mse(image1, image2):
    """Calculate Mean Squared Error (MSE) between two images."""
    return ((image1 - image2) ** 2).mean()

# Function to calculate CLIP score
def calculate_clip_score(images, texts, clip_processor, clip_model):
    """Calculate CLIP score for generated images and text prompts."""
    inputs = clip_processor(text=texts, images=images, return_tensors="pt", padding=True)
    outputs = clip_model(**inputs)
    logits_per_image = outputs.logits_per_image
    return logits_per_image.diag().mean().item()

# Function to analyze layer sensitivity
def analyze_layer_sensitivity(model, prompts, bits, sample_size=100):
    """Analyze layer sensitivity to quantization."""
    clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
    clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
    results = {}
    for name, param in model.named_parameters():
        if "weight" in name:
            results[name] = {}
            for b in bits:
                quantized_model = copy.deepcopy(model)
                quantize_layer(quantized_model, name, b)
                # Generate images with the quantized model (implementation needed)
                generated_images = generate_images(quantized_model, prompts, sample_size)
                # Calculate metrics
                results[name][b] = {
                    "mse": calculate_mse(generated_images, generate_images(model, prompts, sample_size)),
                    "clip_score": calculate_clip_score(generated_images, prompts, clip_processor, clip_model),
                    "parameters": param.numel()
    return results

# Function to quantize layer using Lloyd-Max from SciPy
def quantize_layer(model, layer_name, bits):
    """Apply quantization to a specific layer using the Lloyd-Max algorithm from SciPy.

        model: PyTorch model containing the layer to be quantized.
        layer_name: Name of the layer to be quantized (string).
        bits: Number of bits used for quantization (integer).

    # Get the layer by its name
    layer = dict(model.named_modules())[layer_name]

    # Ensure the layer has weights
    if not hasattr(layer, 'weight'):
        print(f"Layer {layer_name} does not have weights.")

    # Get the weight tensor
    w =

    # Skip quantization if weights are already in integer data type
    if w.dtype not in [torch.float, torch.float16, torch.float32, torch.float64]:
        print(f"Layer {layer_name} already has integer weights.")

    # Convert tensor to CPU for Lloyd-Max quantization
    w = w.cpu().numpy()

    # Perform Lloyd-Max quantization using kmeans2 from SciPy
    centroids, labels = kmeans2(w.reshape(-1, 1), 2**bits, minit='points')
    quantized_w = centroids[labels].reshape(w.shape)

    # Convert back to PyTorch tensor and save to the layer = torch.from_numpy(quantized_w).to(layer.weight.device)

    # Calculate and save scale factor and zero point
    min_val = w.min()
    max_val = w.max()
    scale = (max_val - min_val) / (2**bits - 1)
    zero_point = torch.round(-min_val / scale).to(layer.weight.device)
    layer.quantization_scale =
    layer.quantization_zero_point = zero_point

    # Replace forward function to perform dequantization during inference
    layer._forward_impl = layer.forward
    def quantized_forward(*args, **kwargs):
        # Dequantize weights before forward operation
        dequantized_w = ( - layer.quantization_zero_point) * layer.quantization_scale
        return layer._forward_impl(dequantized_w, *args[1:], **kwargs)
    layer.forward = quantized_forward

# Function to generate images
def generate_images(model, prompts, sample_size):
    """Generate images using the Stable Diffusion model."""
    # Implement the image generation process using the diffusers pipeline
    # Use `model` and `prompts` as input
    # ...
    # TODO

# Function to determine mixed-precision strategy
def determine_mixed_precision(results, sensitivity_threshold, size_factor, clip_thresholds):
    """Determine optimal bit-width for each layer."""
    mixed_precision = {}
    for name, layer_results in results.items():
        sensitivity_scores = {
            b: layer_results[b]["mse"] / (layer_results[b]["parameters"] ** size_factor)
            for b in bits
        optimal_bits = min(bits, key=lambda b: sensitivity_scores[b])
        if sensitivity_scores[optimal_bits] > sensitivity_threshold:
            optimal_bits = max(bits)  # Use maximum bit-width if exceeding threshold
        clip_score_drop = layer_results[max(bits)]["clip_score"] - results[name][32]["clip_score"]
        for i, threshold in enumerate(clip_thresholds):
            if clip_score_drop > np.quantile(
                [results[n][32]["clip_score"] for n in results], threshold
                optimal_bits += i + 1
        mixed_precision[name] = optimal_bits
    return mixed_precision

# Function to quantize the model
def quantize_model(model, mixed_precision):
    """Apply quantization to the model based on mixed-precision strategy."""
    for name, param in model.named_parameters():
        if "weight" in name and name in mixed_precision:
            quantize_layer(model, name, mixed_precision[name])
    return model

# Function for pre-computation and caching of time embeddings
def precompute_time_embeddings(model, time_steps):
    """Calculate and store time embeddings."""
    time_embeddings = {}
    for t in range(time_steps):
        time_embeddings[t] = model.time_embedding(torch.tensor([t]))
    return time_embeddings

# Load Stable Diffusion model
pipe = StableDiffusionPipeline.from_pretrained(model_path, torch_dtype=torch.float16)
model = pipe.unet

# Layer sensitivity analysis (implement generate_images first)
prompts = ["A photo of a cat"]  # Replace with your desired prompts
results = analyze_layer_sensitivity(model, prompts, bits)

# Determine mixed-precision strategy
mixed_precision = determine_mixed_precision(
    results, sensitivity_threshold, size_factor, clip_thresholds

# Apply quantization to the model
quantized_model = quantize_model(model, mixed_precision)

# Pre-computation and caching of time embeddings
time_embeddings = precompute_time_embeddings(quantized_model, time_steps)

# Save the quantized model, output_path)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
None yet
None yet

No branches or pull requests

4 participants