Update Huggingface Transformer

This commit is contained in:
Timothy Kassis
2025-10-21 10:30:38 -07:00
parent 1a9149b089
commit 11da596765
12 changed files with 2328 additions and 3148 deletions

309
scientific-packages/transformers/scripts/generate_text.py Executable file → Normal file
View File

@@ -1,231 +1,188 @@
#!/usr/bin/env python3
"""
Text generation with various strategies.
Text generation with different decoding strategies.
This script demonstrates different generation strategies:
- Greedy decoding
- Beam search
- Sampling with temperature
- Top-k and top-p sampling
Usage:
python generate_text.py --model gpt2 --prompt "The future of AI" --strategy sampling
This script demonstrates various text generation approaches using
different sampling and decoding strategies.
"""
import argparse
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
def generate_with_greedy(model, tokenizer, prompt, max_length):
"""Greedy decoding (deterministic)."""
print("\n" + "=" * 60)
print("GREEDY DECODING")
print("=" * 60)
def load_model_and_tokenizer(model_name="gpt2"):
"""
Load model and tokenizer.
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
Args:
model_name: Name of the model to load
Returns:
model, tokenizer
"""
print(f"Loading model: {model_name}")
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)
# Set pad token if not already set
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
return model, tokenizer
def generate_with_greedy(model, tokenizer, prompt, max_new_tokens=50):
"""Greedy decoding - always picks highest probability token."""
print("\n=== Greedy Decoding ===")
print(f"Prompt: {prompt}")
inputs = tokenizer(prompt, return_tensors="pt")
outputs = model.generate(
**inputs,
max_new_tokens=max_length,
pad_token_id=tokenizer.eos_token_id,
max_new_tokens=max_new_tokens,
do_sample=False,
num_beams=1,
pad_token_id=tokenizer.pad_token_id
)
text = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(f"\nPrompt: {prompt}")
print(f"\nGenerated:\n{text}")
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(f"Generated: {generated_text}\n")
def generate_with_beam_search(model, tokenizer, prompt, max_length, num_beams=5):
"""Beam search for higher quality."""
print("\n" + "=" * 60)
print(f"BEAM SEARCH (num_beams={num_beams})")
print("=" * 60)
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
def generate_with_beam_search(model, tokenizer, prompt, max_new_tokens=50, num_beams=5):
"""Beam search - explores multiple hypotheses."""
print("\n=== Beam Search ===")
print(f"Prompt: {prompt}")
inputs = tokenizer(prompt, return_tensors="pt")
outputs = model.generate(
**inputs,
max_new_tokens=max_length,
max_new_tokens=max_new_tokens,
num_beams=num_beams,
early_stopping=True,
no_repeat_ngram_size=2,
pad_token_id=tokenizer.eos_token_id,
pad_token_id=tokenizer.pad_token_id
)
text = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(f"\nPrompt: {prompt}")
print(f"\nGenerated:\n{text}")
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(f"Generated: {generated_text}\n")
def generate_with_sampling(model, tokenizer, prompt, max_length, temperature=0.8):
"""Sampling with temperature."""
print("\n" + "=" * 60)
print(f"SAMPLING (temperature={temperature})")
print("=" * 60)
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
def generate_with_sampling(model, tokenizer, prompt, max_new_tokens=50,
temperature=0.7, top_k=50, top_p=0.9):
"""Sampling with temperature, top-k, and nucleus (top-p) sampling."""
print("\n=== Sampling (Temperature + Top-K + Top-P) ===")
print(f"Prompt: {prompt}")
print(f"Parameters: temperature={temperature}, top_k={top_k}, top_p={top_p}")
inputs = tokenizer(prompt, return_tensors="pt")
outputs = model.generate(
**inputs,
max_new_tokens=max_length,
max_new_tokens=max_new_tokens,
do_sample=True,
temperature=temperature,
pad_token_id=tokenizer.eos_token_id,
)
text = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(f"\nPrompt: {prompt}")
print(f"\nGenerated:\n{text}")
def generate_with_top_k_top_p(model, tokenizer, prompt, max_length, top_k=50, top_p=0.95, temperature=0.8):
"""Top-k and top-p (nucleus) sampling."""
print("\n" + "=" * 60)
print(f"TOP-K TOP-P SAMPLING (k={top_k}, p={top_p}, temp={temperature})")
print("=" * 60)
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
outputs = model.generate(
**inputs,
max_new_tokens=max_length,
do_sample=True,
top_k=top_k,
top_p=top_p,
temperature=temperature,
repetition_penalty=1.2,
no_repeat_ngram_size=3,
pad_token_id=tokenizer.eos_token_id,
pad_token_id=tokenizer.pad_token_id
)
text = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(f"\nPrompt: {prompt}")
print(f"\nGenerated:\n{text}")
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(f"Generated: {generated_text}\n")
def generate_multiple(model, tokenizer, prompt, max_length, num_sequences=3):
def generate_multiple_sequences(model, tokenizer, prompt, max_new_tokens=50,
num_return_sequences=3):
"""Generate multiple diverse sequences."""
print("\n" + "=" * 60)
print(f"MULTIPLE SEQUENCES (n={num_sequences})")
print("=" * 60)
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
print("\n=== Multiple Sequences (with Sampling) ===")
print(f"Prompt: {prompt}")
print(f"Generating {num_return_sequences} sequences...")
inputs = tokenizer(prompt, return_tensors="pt")
outputs = model.generate(
**inputs,
max_new_tokens=max_length,
max_new_tokens=max_new_tokens,
do_sample=True,
num_return_sequences=num_sequences,
temperature=0.9,
temperature=0.8,
top_p=0.95,
pad_token_id=tokenizer.eos_token_id,
num_return_sequences=num_return_sequences,
pad_token_id=tokenizer.pad_token_id
)
print(f"\nPrompt: {prompt}\n")
for i, output in enumerate(outputs, 1):
text = tokenizer.decode(output, skip_special_tokens=True)
print(f"\n--- Sequence {i} ---\n{text}\n")
for i, output in enumerate(outputs):
generated_text = tokenizer.decode(output, skip_special_tokens=True)
print(f"\nSequence {i+1}: {generated_text}")
print()
def generate_with_config(model, tokenizer, prompt):
"""Use GenerationConfig for reusable configuration."""
print("\n=== Using GenerationConfig ===")
print(f"Prompt: {prompt}")
# Create a generation config
generation_config = GenerationConfig(
max_new_tokens=50,
do_sample=True,
temperature=0.7,
top_p=0.9,
top_k=50,
repetition_penalty=1.2,
no_repeat_ngram_size=3,
pad_token_id=tokenizer.pad_token_id
)
inputs = tokenizer(prompt, return_tensors="pt")
outputs = model.generate(**inputs, generation_config=generation_config)
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(f"Generated: {generated_text}\n")
def compare_temperatures(model, tokenizer, prompt, max_new_tokens=50):
"""Compare different temperature settings."""
print("\n=== Temperature Comparison ===")
print(f"Prompt: {prompt}\n")
temperatures = [0.3, 0.7, 1.0, 1.5]
for temp in temperatures:
inputs = tokenizer(prompt, return_tensors="pt")
outputs = model.generate(
**inputs,
max_new_tokens=max_new_tokens,
do_sample=True,
temperature=temp,
top_p=0.9,
pad_token_id=tokenizer.pad_token_id
)
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(f"Temperature {temp}: {generated_text}\n")
def main():
parser = argparse.ArgumentParser(description="Text generation with various strategies")
parser.add_argument(
"--model",
type=str,
default="gpt2",
help="Model name or path",
)
parser.add_argument(
"--prompt",
type=str,
required=True,
help="Input prompt for generation",
)
parser.add_argument(
"--strategy",
type=str,
default="all",
choices=["greedy", "beam", "sampling", "top_k_top_p", "multiple", "all"],
help="Generation strategy to use",
)
parser.add_argument(
"--max-length",
type=int,
default=100,
help="Maximum number of new tokens to generate",
)
parser.add_argument(
"--device",
type=str,
default="auto",
help="Device (cuda, cpu, or auto)",
)
parser.add_argument(
"--temperature",
type=float,
default=0.8,
help="Sampling temperature",
)
parser.add_argument(
"--quantize",
action="store_true",
help="Use 8-bit quantization",
)
args = parser.parse_args()
print("=" * 60)
print("Text Generation Demo")
print("=" * 60)
print(f"Model: {args.model}")
print(f"Strategy: {args.strategy}")
print(f"Max length: {args.max_length}")
print(f"Device: {args.device}")
print("=" * 60)
"""Run all generation examples."""
print("=" * 70)
print("Text Generation Examples")
print("=" * 70)
# Load model and tokenizer
print("\nLoading model...")
model, tokenizer = load_model_and_tokenizer("gpt2")
if args.device == "auto":
device_map = "auto"
device = None
else:
device_map = None
device = args.device
# Example prompts
story_prompt = "Once upon a time in a distant galaxy"
factual_prompt = "The three branches of the US government are"
model_kwargs = {"device_map": device_map} if device_map else {}
# Demonstrate different strategies
generate_with_greedy(model, tokenizer, story_prompt)
generate_with_beam_search(model, tokenizer, factual_prompt)
generate_with_sampling(model, tokenizer, story_prompt)
generate_multiple_sequences(model, tokenizer, story_prompt, num_return_sequences=3)
generate_with_config(model, tokenizer, story_prompt)
compare_temperatures(model, tokenizer, story_prompt)
if args.quantize:
print("Using 8-bit quantization...")
model_kwargs["load_in_8bit"] = True
model = AutoModelForCausalLM.from_pretrained(args.model, **model_kwargs)
tokenizer = AutoTokenizer.from_pretrained(args.model)
if device and not device_map:
model = model.to(device)
print(f"Model loaded on: {model.device if hasattr(model, 'device') else 'multiple devices'}")
# Generate based on strategy
strategies = {
"greedy": lambda: generate_with_greedy(model, tokenizer, args.prompt, args.max_length),
"beam": lambda: generate_with_beam_search(model, tokenizer, args.prompt, args.max_length),
"sampling": lambda: generate_with_sampling(model, tokenizer, args.prompt, args.max_length, args.temperature),
"top_k_top_p": lambda: generate_with_top_k_top_p(model, tokenizer, args.prompt, args.max_length),
"multiple": lambda: generate_multiple(model, tokenizer, args.prompt, args.max_length),
}
if args.strategy == "all":
for strategy_fn in strategies.values():
strategy_fn()
else:
strategies[args.strategy]()
print("\n" + "=" * 60)
print("Generation complete!")
print("=" * 60)
print("=" * 70)
print("All generation examples completed!")
print("=" * 70)
if __name__ == "__main__":