Update Huggingface Transformer

2026-03-28 07:33:45 +08:00 · 2025-10-21 10:30:38 -07:00
parent 1a9149b089
commit 11da596765
12 changed files with 2328 additions and 3148 deletions
--- a/scientific-packages/transformers/scripts/fine_tune_classifier.py
+++ b/scientific-packages/transformers/scripts/fine_tune_classifier.py
@@ -1,19 +1,12 @@
 #!/usr/bin/env python3
 """
-Complete example for fine-tuning a text classification model.
+Fine-tune a transformer model for text classification.

-This script demonstrates the full workflow:
-1. Load dataset
-2. Preprocess with tokenizer
-3. Configure model
-4. Train with Trainer
-5. Evaluate and save
-
-Usage:
-    python fine_tune_classifier.py --model bert-base-uncased --dataset imdb --epochs 3
+This script demonstrates the complete workflow for fine-tuning a pre-trained
+model on a classification task using the Trainer API.
 """

-import argparse
+import numpy as np
 from datasets import load_dataset
 from transformers import (
    AutoTokenizer,
@@ -23,189 +16,225 @@ from transformers import (
    DataCollatorWithPadding,
 )
 import evaluate
-import numpy as np


-def compute_metrics(eval_pred):
-    """Compute accuracy and F1 score."""
-    metric_accuracy = evaluate.load("accuracy")
-    metric_f1 = evaluate.load("f1")
+def load_and_prepare_data(dataset_name="imdb", model_name="distilbert-base-uncased", max_samples=None):
+    """
+    Load dataset and tokenize.

-    logits, labels = eval_pred
-    predictions = np.argmax(logits, axis=-1)
+    Args:
+        dataset_name: Name of the dataset to load
+        model_name: Name of the model/tokenizer to use
+        max_samples: Limit number of samples (for quick testing)

-    accuracy = metric_accuracy.compute(predictions=predictions, references=labels)
-    f1 = metric_f1.compute(predictions=predictions, references=labels)
+    Returns:
+        tokenized_datasets, tokenizer
+    """
+    print(f"Loading dataset: {dataset_name}")
+    dataset = load_dataset(dataset_name)

-    return {"accuracy": accuracy["accuracy"], "f1": f1["f1"]}
+    # Optionally limit samples for quick testing
+    if max_samples:
+        dataset["train"] = dataset["train"].select(range(max_samples))
+        dataset["test"] = dataset["test"].select(range(min(max_samples, len(dataset["test"]))))
+
+    print(f"Loading tokenizer: {model_name}")
+    tokenizer = AutoTokenizer.from_pretrained(model_name)
+
+    def tokenize_function(examples):
+        return tokenizer(
+            examples["text"],
+            padding="max_length",
+            truncation=True,
+            max_length=512
+        )
+
+    print("Tokenizing dataset...")
+    tokenized_datasets = dataset.map(tokenize_function, batched=True)
+
+    return tokenized_datasets, tokenizer


-def main():
-    parser = argparse.ArgumentParser(description="Fine-tune a text classification model")
-    parser.add_argument(
-        "--model",
-        type=str,
-        default="bert-base-uncased",
-        help="Pretrained model name or path",
-    )
-    parser.add_argument(
-        "--dataset",
-        type=str,
-        default="imdb",
-        help="Dataset name from Hugging Face Hub",
-    )
-    parser.add_argument(
-        "--max-samples",
-        type=int,
-        default=None,
-        help="Maximum samples to use (for quick testing)",
-    )
-    parser.add_argument(
-        "--output-dir",
-        type=str,
-        default="./results",
-        help="Output directory for checkpoints",
-    )
-    parser.add_argument(
-        "--epochs",
-        type=int,
-        default=3,
-        help="Number of training epochs",
-    )
-    parser.add_argument(
-        "--batch-size",
-        type=int,
-        default=16,
-        help="Batch size per device",
-    )
-    parser.add_argument(
-        "--learning-rate",
-        type=float,
-        default=2e-5,
-        help="Learning rate",
-    )
-    parser.add_argument(
-        "--push-to-hub",
-        action="store_true",
-        help="Push model to Hugging Face Hub after training",
-    )
+def create_model(model_name, num_labels, id2label, label2id):
+    """
+    Create classification model.

-    args = parser.parse_args()
-
-    print("=" * 60)
-    print("Text Classification Fine-Tuning")
-    print("=" * 60)
-    print(f"Model: {args.model}")
-    print(f"Dataset: {args.dataset}")
-    print(f"Epochs: {args.epochs}")
-    print(f"Batch size: {args.batch_size}")
-    print(f"Learning rate: {args.learning_rate}")
-    print("=" * 60)
-
-    # 1. Load dataset
-    print("\n[1/5] Loading dataset...")
-    dataset = load_dataset(args.dataset)
-
-    if args.max_samples:
-        dataset["train"] = dataset["train"].select(range(args.max_samples))
-        dataset["test"] = dataset["test"].select(range(args.max_samples // 5))
-
-    print(f"Train samples: {len(dataset['train'])}")
-    print(f"Test samples: {len(dataset['test'])}")
-
-    # 2. Preprocess
-    print("\n[2/5] Preprocessing data...")
-    tokenizer = AutoTokenizer.from_pretrained(args.model)
-
-    def preprocess_function(examples):
-        return tokenizer(examples["text"], truncation=True, max_length=512)
-
-    tokenized_dataset = dataset.map(preprocess_function, batched=True)
-    data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
-
-    # 3. Load model
-    print("\n[3/5] Loading model...")
-
-    # Determine number of labels
-    num_labels = len(set(dataset["train"]["label"]))
+    Args:
+        model_name: Name of the pre-trained model
+        num_labels: Number of classification labels
+        id2label: Dictionary mapping label IDs to names
+        label2id: Dictionary mapping label names to IDs

+    Returns:
+        model
+    """
+    print(f"Loading model: {model_name}")
    model = AutoModelForSequenceClassification.from_pretrained(
-        args.model,
+        model_name,
        num_labels=num_labels,
+        id2label=id2label,
+        label2id=label2id
    )
+    return model

-    print(f"Number of labels: {num_labels}")
-    print(f"Model parameters: {model.num_parameters():,}")

-    # 4. Configure training
-    print("\n[4/5] Configuring training...")
+def define_compute_metrics(metric_name="accuracy"):
+    """
+    Define function to compute metrics during evaluation.
+
+    Args:
+        metric_name: Name of the metric to use
+
+    Returns:
+        compute_metrics function
+    """
+    metric = evaluate.load(metric_name)
+
+    def compute_metrics(eval_pred):
+        logits, labels = eval_pred
+        predictions = np.argmax(logits, axis=-1)
+        return metric.compute(predictions=predictions, references=labels)
+
+    return compute_metrics
+
+
+def train_model(model, tokenizer, train_dataset, eval_dataset, output_dir="./results"):
+    """
+    Train the model.
+
+    Args:
+        model: The model to train
+        tokenizer: The tokenizer
+        train_dataset: Training dataset
+        eval_dataset: Evaluation dataset
+        output_dir: Directory for checkpoints and logs
+
+    Returns:
+        trained model, trainer
+    """
+    # Define training arguments
    training_args = TrainingArguments(
-        output_dir=args.output_dir,
-        learning_rate=args.learning_rate,
-        per_device_train_batch_size=args.batch_size,
-        per_device_eval_batch_size=args.batch_size,
-        num_train_epochs=args.epochs,
+        output_dir=output_dir,
+        num_train_epochs=3,
+        per_device_train_batch_size=16,
+        per_device_eval_batch_size=64,
+        learning_rate=2e-5,
        weight_decay=0.01,
        eval_strategy="epoch",
        save_strategy="epoch",
        load_best_model_at_end=True,
-        push_to_hub=args.push_to_hub,
+        metric_for_best_model="accuracy",
+        logging_dir=f"{output_dir}/logs",
        logging_steps=100,
+        save_total_limit=2,
+        fp16=False,  # Set to True if using GPU with fp16 support
    )

+    # Create data collator
+    data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
+
+    # Create trainer
    trainer = Trainer(
        model=model,
        args=training_args,
-        train_dataset=tokenized_dataset["train"],
-        eval_dataset=tokenized_dataset["test"],
-        tokenizer=tokenizer,
+        train_dataset=train_dataset,
+        eval_dataset=eval_dataset,
        data_collator=data_collator,
-        compute_metrics=compute_metrics,
+        compute_metrics=define_compute_metrics("accuracy"),
    )

-    # 5. Train
-    print("\n[5/5] Training...")
-    print("-" * 60)
+    # Train
+    print("\nStarting training...")
    trainer.train()

    # Evaluate
-    print("\n" + "=" * 60)
-    print("Final Evaluation")
-    print("=" * 60)
-    metrics = trainer.evaluate()
+    print("\nEvaluating model...")
+    eval_results = trainer.evaluate()
+    print(f"Evaluation results: {eval_results}")

-    print(f"Accuracy: {metrics['eval_accuracy']:.4f}")
-    print(f"F1 Score: {metrics['eval_f1']:.4f}")
-    print(f"Loss: {metrics['eval_loss']:.4f}")
+    return model, trainer

-    # Save
-    print("\n" + "=" * 60)
-    print(f"Saving model to {args.output_dir}")
-    trainer.save_model(args.output_dir)
-    tokenizer.save_pretrained(args.output_dir)

-    if args.push_to_hub:
-        print("Pushing to Hugging Face Hub...")
-        trainer.push_to_hub()
+def test_inference(model, tokenizer, id2label):
+    """
+    Test the trained model with sample texts.
+
+    Args:
+        model: Trained model
+        tokenizer: Tokenizer
+        id2label: Dictionary mapping label IDs to names
+    """
+    print("\n=== Testing Inference ===")
+
+    test_texts = [
+        "This movie was absolutely fantastic! I loved every minute of it.",
+        "Terrible film. Waste of time and money.",
+        "It was okay, nothing special but not bad either."
+    ]
+
+    for text in test_texts:
+        inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
+        outputs = model(**inputs)
+        predictions = outputs.logits.argmax(-1)
+        predicted_label = id2label[predictions.item()]
+        confidence = outputs.logits.softmax(-1).max().item()
+
+        print(f"\nText: {text}")
+        print(f"Prediction: {predicted_label} (confidence: {confidence:.3f})")
+
+
+def main():
+    """Main training pipeline."""
+    # Configuration
+    DATASET_NAME = "imdb"
+    MODEL_NAME = "distilbert-base-uncased"
+    OUTPUT_DIR = "./results"
+    MAX_SAMPLES = None  # Set to a small number (e.g., 1000) for quick testing
+
+    # Label mapping
+    id2label = {0: "negative", 1: "positive"}
+    label2id = {"negative": 0, "positive": 1}
+    num_labels = len(id2label)

    print("=" * 60)
-    print("Training complete!")
+    print("Fine-Tuning Text Classification Model")
    print("=" * 60)

-    # Quick inference example
-    print("\nQuick inference example:")
-    from transformers import pipeline
-
-    classifier = pipeline(
-        "text-classification",
-        model=args.output_dir,
-        tokenizer=args.output_dir,
+    # Load and prepare data
+    tokenized_datasets, tokenizer = load_and_prepare_data(
+        dataset_name=DATASET_NAME,
+        model_name=MODEL_NAME,
+        max_samples=MAX_SAMPLES
    )

-    example_text = "This is a great example of how to use transformers!"
-    result = classifier(example_text)
-    print(f"Text: {example_text}")
-    print(f"Prediction: {result[0]['label']} (score: {result[0]['score']:.4f})")
+    # Create model
+    model = create_model(
+        model_name=MODEL_NAME,
+        num_labels=num_labels,
+        id2label=id2label,
+        label2id=label2id
+    )
+
+    # Train model
+    model, trainer = train_model(
+        model=model,
+        tokenizer=tokenizer,
+        train_dataset=tokenized_datasets["train"],
+        eval_dataset=tokenized_datasets["test"],
+        output_dir=OUTPUT_DIR
+    )
+
+    # Save final model
+    print(f"\nSaving model to {OUTPUT_DIR}/final_model")
+    trainer.save_model(f"{OUTPUT_DIR}/final_model")
+    tokenizer.save_pretrained(f"{OUTPUT_DIR}/final_model")
+
+    # Test inference
+    test_inference(model, tokenizer, id2label)
+
+    print("\n" + "=" * 60)
+    print("Training completed successfully!")
+    print("=" * 60)


 if __name__ == "__main__":
--- a/scientific-packages/transformers/scripts/generate_text.py
+++ b/scientific-packages/transformers/scripts/generate_text.py
@@ -1,231 +1,188 @@
 #!/usr/bin/env python3
 """
-Text generation with various strategies.
+Text generation with different decoding strategies.

-This script demonstrates different generation strategies:
- Greedy decoding
- Beam search
- Sampling with temperature
- Top-k and top-p sampling
-
-Usage:
-    python generate_text.py --model gpt2 --prompt "The future of AI" --strategy sampling
+This script demonstrates various text generation approaches using
+different sampling and decoding strategies.
 """

-import argparse
-import torch
-from transformers import AutoModelForCausalLM, AutoTokenizer
+from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig


-def generate_with_greedy(model, tokenizer, prompt, max_length):
-    """Greedy decoding (deterministic)."""
-    print("\n" + "=" * 60)
-    print("GREEDY DECODING")
-    print("=" * 60)
+def load_model_and_tokenizer(model_name="gpt2"):
+    """
+    Load model and tokenizer.

-    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
+    Args:
+        model_name: Name of the model to load

+    Returns:
+        model, tokenizer
+    """
+    print(f"Loading model: {model_name}")
+    tokenizer = AutoTokenizer.from_pretrained(model_name)
+    model = AutoModelForCausalLM.from_pretrained(model_name)
+
+    # Set pad token if not already set
+    if tokenizer.pad_token is None:
+        tokenizer.pad_token = tokenizer.eos_token
+
+    return model, tokenizer
+
+
+def generate_with_greedy(model, tokenizer, prompt, max_new_tokens=50):
+    """Greedy decoding - always picks highest probability token."""
+    print("\n=== Greedy Decoding ===")
+    print(f"Prompt: {prompt}")
+
+    inputs = tokenizer(prompt, return_tensors="pt")
    outputs = model.generate(
        **inputs,
-        max_new_tokens=max_length,
-        pad_token_id=tokenizer.eos_token_id,
+        max_new_tokens=max_new_tokens,
+        do_sample=False,
+        num_beams=1,
+        pad_token_id=tokenizer.pad_token_id
    )

-    text = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    print(f"\nPrompt: {prompt}")
-    print(f"\nGenerated:\n{text}")
+    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    print(f"Generated: {generated_text}\n")


-def generate_with_beam_search(model, tokenizer, prompt, max_length, num_beams=5):
-    """Beam search for higher quality."""
-    print("\n" + "=" * 60)
-    print(f"BEAM SEARCH (num_beams={num_beams})")
-    print("=" * 60)
-
-    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
+def generate_with_beam_search(model, tokenizer, prompt, max_new_tokens=50, num_beams=5):
+    """Beam search - explores multiple hypotheses."""
+    print("\n=== Beam Search ===")
+    print(f"Prompt: {prompt}")

+    inputs = tokenizer(prompt, return_tensors="pt")
    outputs = model.generate(
        **inputs,
-        max_new_tokens=max_length,
+        max_new_tokens=max_new_tokens,
        num_beams=num_beams,
        early_stopping=True,
        no_repeat_ngram_size=2,
-        pad_token_id=tokenizer.eos_token_id,
+        pad_token_id=tokenizer.pad_token_id
    )

-    text = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    print(f"\nPrompt: {prompt}")
-    print(f"\nGenerated:\n{text}")
+    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    print(f"Generated: {generated_text}\n")


-def generate_with_sampling(model, tokenizer, prompt, max_length, temperature=0.8):
-    """Sampling with temperature."""
-    print("\n" + "=" * 60)
-    print(f"SAMPLING (temperature={temperature})")
-    print("=" * 60)
-
-    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
+def generate_with_sampling(model, tokenizer, prompt, max_new_tokens=50,
+                           temperature=0.7, top_k=50, top_p=0.9):
+    """Sampling with temperature, top-k, and nucleus (top-p) sampling."""
+    print("\n=== Sampling (Temperature + Top-K + Top-P) ===")
+    print(f"Prompt: {prompt}")
+    print(f"Parameters: temperature={temperature}, top_k={top_k}, top_p={top_p}")

+    inputs = tokenizer(prompt, return_tensors="pt")
    outputs = model.generate(
        **inputs,
-        max_new_tokens=max_length,
+        max_new_tokens=max_new_tokens,
        do_sample=True,
        temperature=temperature,
-        pad_token_id=tokenizer.eos_token_id,
-    )
-
-    text = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    print(f"\nPrompt: {prompt}")
-    print(f"\nGenerated:\n{text}")
-
-
-def generate_with_top_k_top_p(model, tokenizer, prompt, max_length, top_k=50, top_p=0.95, temperature=0.8):
-    """Top-k and top-p (nucleus) sampling."""
-    print("\n" + "=" * 60)
-    print(f"TOP-K TOP-P SAMPLING (k={top_k}, p={top_p}, temp={temperature})")
-    print("=" * 60)
-
-    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
-
-    outputs = model.generate(
-        **inputs,
-        max_new_tokens=max_length,
-        do_sample=True,
        top_k=top_k,
        top_p=top_p,
-        temperature=temperature,
-        repetition_penalty=1.2,
-        no_repeat_ngram_size=3,
-        pad_token_id=tokenizer.eos_token_id,
+        pad_token_id=tokenizer.pad_token_id
    )

-    text = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    print(f"\nPrompt: {prompt}")
-    print(f"\nGenerated:\n{text}")
+    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    print(f"Generated: {generated_text}\n")


-def generate_multiple(model, tokenizer, prompt, max_length, num_sequences=3):
+def generate_multiple_sequences(model, tokenizer, prompt, max_new_tokens=50,
+                                 num_return_sequences=3):
    """Generate multiple diverse sequences."""
-    print("\n" + "=" * 60)
-    print(f"MULTIPLE SEQUENCES (n={num_sequences})")
-    print("=" * 60)
-
-    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
+    print("\n=== Multiple Sequences (with Sampling) ===")
+    print(f"Prompt: {prompt}")
+    print(f"Generating {num_return_sequences} sequences...")

+    inputs = tokenizer(prompt, return_tensors="pt")
    outputs = model.generate(
        **inputs,
-        max_new_tokens=max_length,
+        max_new_tokens=max_new_tokens,
        do_sample=True,
-        num_return_sequences=num_sequences,
-        temperature=0.9,
+        temperature=0.8,
        top_p=0.95,
-        pad_token_id=tokenizer.eos_token_id,
+        num_return_sequences=num_return_sequences,
+        pad_token_id=tokenizer.pad_token_id
    )

-    print(f"\nPrompt: {prompt}\n")
-    for i, output in enumerate(outputs, 1):
-        text = tokenizer.decode(output, skip_special_tokens=True)
-        print(f"\n--- Sequence {i} ---\n{text}\n")
+    for i, output in enumerate(outputs):
+        generated_text = tokenizer.decode(output, skip_special_tokens=True)
+        print(f"\nSequence {i+1}: {generated_text}")
+    print()
+
+
+def generate_with_config(model, tokenizer, prompt):
+    """Use GenerationConfig for reusable configuration."""
+    print("\n=== Using GenerationConfig ===")
+    print(f"Prompt: {prompt}")
+
+    # Create a generation config
+    generation_config = GenerationConfig(
+        max_new_tokens=50,
+        do_sample=True,
+        temperature=0.7,
+        top_p=0.9,
+        top_k=50,
+        repetition_penalty=1.2,
+        no_repeat_ngram_size=3,
+        pad_token_id=tokenizer.pad_token_id
+    )
+
+    inputs = tokenizer(prompt, return_tensors="pt")
+    outputs = model.generate(**inputs, generation_config=generation_config)
+
+    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    print(f"Generated: {generated_text}\n")
+
+
+def compare_temperatures(model, tokenizer, prompt, max_new_tokens=50):
+    """Compare different temperature settings."""
+    print("\n=== Temperature Comparison ===")
+    print(f"Prompt: {prompt}\n")
+
+    temperatures = [0.3, 0.7, 1.0, 1.5]
+
+    for temp in temperatures:
+        inputs = tokenizer(prompt, return_tensors="pt")
+        outputs = model.generate(
+            **inputs,
+            max_new_tokens=max_new_tokens,
+            do_sample=True,
+            temperature=temp,
+            top_p=0.9,
+            pad_token_id=tokenizer.pad_token_id
+        )
+
+        generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
+        print(f"Temperature {temp}: {generated_text}\n")


 def main():
-    parser = argparse.ArgumentParser(description="Text generation with various strategies")
-    parser.add_argument(
-        "--model",
-        type=str,
-        default="gpt2",
-        help="Model name or path",
-    )
-    parser.add_argument(
-        "--prompt",
-        type=str,
-        required=True,
-        help="Input prompt for generation",
-    )
-    parser.add_argument(
-        "--strategy",
-        type=str,
-        default="all",
-        choices=["greedy", "beam", "sampling", "top_k_top_p", "multiple", "all"],
-        help="Generation strategy to use",
-    )
-    parser.add_argument(
-        "--max-length",
-        type=int,
-        default=100,
-        help="Maximum number of new tokens to generate",
-    )
-    parser.add_argument(
-        "--device",
-        type=str,
-        default="auto",
-        help="Device (cuda, cpu, or auto)",
-    )
-    parser.add_argument(
-        "--temperature",
-        type=float,
-        default=0.8,
-        help="Sampling temperature",
-    )
-    parser.add_argument(
-        "--quantize",
-        action="store_true",
-        help="Use 8-bit quantization",
-    )
-
-    args = parser.parse_args()
-
-    print("=" * 60)
-    print("Text Generation Demo")
-    print("=" * 60)
-    print(f"Model: {args.model}")
-    print(f"Strategy: {args.strategy}")
-    print(f"Max length: {args.max_length}")
-    print(f"Device: {args.device}")
-    print("=" * 60)
+    """Run all generation examples."""
+    print("=" * 70)
+    print("Text Generation Examples")
+    print("=" * 70)

    # Load model and tokenizer
-    print("\nLoading model...")
+    model, tokenizer = load_model_and_tokenizer("gpt2")

-    if args.device == "auto":
-        device_map = "auto"
-        device = None
-    else:
-        device_map = None
-        device = args.device
+    # Example prompts
+    story_prompt = "Once upon a time in a distant galaxy"
+    factual_prompt = "The three branches of the US government are"

-    model_kwargs = {"device_map": device_map} if device_map else {}
+    # Demonstrate different strategies
+    generate_with_greedy(model, tokenizer, story_prompt)
+    generate_with_beam_search(model, tokenizer, factual_prompt)
+    generate_with_sampling(model, tokenizer, story_prompt)
+    generate_multiple_sequences(model, tokenizer, story_prompt, num_return_sequences=3)
+    generate_with_config(model, tokenizer, story_prompt)
+    compare_temperatures(model, tokenizer, story_prompt)

-    if args.quantize:
-        print("Using 8-bit quantization...")
-        model_kwargs["load_in_8bit"] = True
-
-    model = AutoModelForCausalLM.from_pretrained(args.model, **model_kwargs)
-    tokenizer = AutoTokenizer.from_pretrained(args.model)
-
-    if device and not device_map:
-        model = model.to(device)
-
-    print(f"Model loaded on: {model.device if hasattr(model, 'device') else 'multiple devices'}")
-
-    # Generate based on strategy
-    strategies = {
-        "greedy": lambda: generate_with_greedy(model, tokenizer, args.prompt, args.max_length),
-        "beam": lambda: generate_with_beam_search(model, tokenizer, args.prompt, args.max_length),
-        "sampling": lambda: generate_with_sampling(model, tokenizer, args.prompt, args.max_length, args.temperature),
-        "top_k_top_p": lambda: generate_with_top_k_top_p(model, tokenizer, args.prompt, args.max_length),
-        "multiple": lambda: generate_multiple(model, tokenizer, args.prompt, args.max_length),
-    }
-
-    if args.strategy == "all":
-        for strategy_fn in strategies.values():
-            strategy_fn()
-    else:
-        strategies[args.strategy]()
-
-    print("\n" + "=" * 60)
-    print("Generation complete!")
-    print("=" * 60)
+    print("=" * 70)
+    print("All generation examples completed!")
+    print("=" * 70)


 if __name__ == "__main__":
--- a/scientific-packages/transformers/scripts/quick_inference.py
+++ b/scientific-packages/transformers/scripts/quick_inference.py
@@ -1,105 +1,132 @@
 #!/usr/bin/env python3
 """
-Quick inference script using Transformers pipelines.
+Quick inference using Transformers pipelines.

-This script demonstrates how to use various pipeline tasks for quick inference
-without manually managing models, tokenizers, or preprocessing.
-
-Usage:
-    python quick_inference.py --task text-generation --model gpt2 --input "Hello world"
-    python quick_inference.py --task sentiment-analysis --input "I love this!"
+This script demonstrates how to quickly use pre-trained models for inference
+across various tasks using the pipeline API.
 """

-import argparse
-from transformers import pipeline, infer_device
+from transformers import pipeline
+
+
+def text_classification_example():
+    """Sentiment analysis example."""
+    print("=== Text Classification ===")
+    classifier = pipeline("text-classification")
+    result = classifier("I love using Transformers! It makes NLP so easy.")
+    print(f"Result: {result}\n")
+
+
+def named_entity_recognition_example():
+    """Named Entity Recognition example."""
+    print("=== Named Entity Recognition ===")
+    ner = pipeline("token-classification", aggregation_strategy="simple")
+    text = "My name is Sarah and I work at Microsoft in Seattle"
+    entities = ner(text)
+    for entity in entities:
+        print(f"{entity['word']}: {entity['entity_group']} (score: {entity['score']:.3f})")
+    print()
+
+
+def question_answering_example():
+    """Question Answering example."""
+    print("=== Question Answering ===")
+    qa = pipeline("question-answering")
+    context = "Paris is the capital and most populous city of France. It is located in northern France."
+    question = "What is the capital of France?"
+    answer = qa(question=question, context=context)
+    print(f"Question: {question}")
+    print(f"Answer: {answer['answer']} (score: {answer['score']:.3f})\n")
+
+
+def text_generation_example():
+    """Text generation example."""
+    print("=== Text Generation ===")
+    generator = pipeline("text-generation", model="gpt2")
+    prompt = "Once upon a time in a land far away"
+    generated = generator(prompt, max_length=50, num_return_sequences=1)
+    print(f"Prompt: {prompt}")
+    print(f"Generated: {generated[0]['generated_text']}\n")
+
+
+def summarization_example():
+    """Text summarization example."""
+    print("=== Summarization ===")
+    summarizer = pipeline("summarization")
+    article = """
+    The Transformers library provides thousands of pretrained models to perform tasks
+    on texts such as classification, information extraction, question answering,
+    summarization, translation, text generation, etc in over 100 languages. Its aim
+    is to make cutting-edge NLP easier to use for everyone. The library provides APIs
+    to quickly download and use pretrained models on a given text, fine-tune them on
+    your own datasets then share them with the community on the model hub.
+    """
+    summary = summarizer(article, max_length=50, min_length=25, do_sample=False)
+    print(f"Summary: {summary[0]['summary_text']}\n")
+
+
+def translation_example():
+    """Translation example."""
+    print("=== Translation ===")
+    translator = pipeline("translation_en_to_fr")
+    text = "Hello, how are you today?"
+    translation = translator(text)
+    print(f"English: {text}")
+    print(f"French: {translation[0]['translation_text']}\n")
+
+
+def zero_shot_classification_example():
+    """Zero-shot classification example."""
+    print("=== Zero-Shot Classification ===")
+    classifier = pipeline("zero-shot-classification")
+    text = "This is a breaking news story about a major earthquake."
+    candidate_labels = ["politics", "sports", "science", "breaking news"]
+    result = classifier(text, candidate_labels)
+    print(f"Text: {text}")
+    print("Predictions:")
+    for label, score in zip(result['labels'], result['scores']):
+        print(f"  {label}: {score:.3f}")
+    print()
+
+
+def image_classification_example():
+    """Image classification example (requires PIL)."""
+    print("=== Image Classification ===")
+    try:
+        from PIL import Image
+        import requests
+
+        classifier = pipeline("image-classification")
+        url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+        image = Image.open(requests.get(url, stream=True).raw)
+        predictions = classifier(image)
+        print("Top predictions:")
+        for pred in predictions[:3]:
+            print(f"  {pred['label']}: {pred['score']:.3f}")
+        print()
+    except ImportError:
+        print("PIL not installed. Skipping image classification example.\n")


 def main():
-    parser = argparse.ArgumentParser(description="Quick inference with Transformers pipelines")
-    parser.add_argument(
-        "--task",
-        type=str,
-        required=True,
-        help="Pipeline task (text-generation, sentiment-analysis, question-answering, etc.)",
-    )
-    parser.add_argument(
-        "--model",
-        type=str,
-        default=None,
-        help="Model name or path (default: use task default)",
-    )
-    parser.add_argument(
-        "--input",
-        type=str,
-        required=True,
-        help="Input text for inference",
-    )
-    parser.add_argument(
-        "--context",
-        type=str,
-        default=None,
-        help="Context for question-answering tasks",
-    )
-    parser.add_argument(
-        "--max-length",
-        type=int,
-        default=50,
-        help="Maximum generation length",
-    )
-    parser.add_argument(
-        "--device",
-        type=str,
-        default=None,
-        help="Device (cuda, cpu, or auto-detect)",
-    )
+    """Run all examples."""
+    print("Transformers Quick Inference Examples")
+    print("=" * 50 + "\n")

-    args = parser.parse_args()
+    # Text tasks
+    text_classification_example()
+    named_entity_recognition_example()
+    question_answering_example()
+    text_generation_example()
+    summarization_example()
+    translation_example()
+    zero_shot_classification_example()

-    # Auto-detect device if not specified
-    if args.device is None:
-        device = infer_device()
-    else:
-        device = args.device
+    # Vision task (optional)
+    image_classification_example()

-    print(f"Using device: {device}")
-    print(f"Task: {args.task}")
-    print(f"Model: {args.model or 'default'}")
-    print("-" * 50)
-
-    # Create pipeline
-    pipe = pipeline(
-        args.task,
-        model=args.model,
-        device=device,
-    )
-
-    # Run inference based on task
-    if args.task == "question-answering":
-        if args.context is None:
-            print("Error: --context required for question-answering")
-            return
-        result = pipe(question=args.input, context=args.context)
-        print(f"Question: {args.input}")
-        print(f"Context: {args.context}")
-        print(f"\nAnswer: {result['answer']}")
-        print(f"Score: {result['score']:.4f}")
-
-    elif args.task == "text-generation":
-        result = pipe(args.input, max_length=args.max_length)
-        print(f"Prompt: {args.input}")
-        print(f"\nGenerated: {result[0]['generated_text']}")
-
-    elif args.task in ["sentiment-analysis", "text-classification"]:
-        result = pipe(args.input)
-        print(f"Text: {args.input}")
-        print(f"\nLabel: {result[0]['label']}")
-        print(f"Score: {result[0]['score']:.4f}")
-
-    else:
-        # Generic handling for other tasks
-        result = pipe(args.input)
-        print(f"Input: {args.input}")
-        print(f"\nResult: {result}")
+    print("=" * 50)
+    print("All examples completed!")


 if __name__ == "__main__":