Update Huggingface Transformer

2026-01-26 16:58:56 +08:00 · 2025-10-21 10:30:38 -07:00
parent 1a9149b089
commit 11da596765
12 changed files with 2328 additions and 3148 deletions
--- a/scientific-packages/transformers/scripts/quick_inference.py
+++ b/scientific-packages/transformers/scripts/quick_inference.py
@@ -1,105 +1,132 @@
 #!/usr/bin/env python3
 """
-Quick inference script using Transformers pipelines.
+Quick inference using Transformers pipelines.

-This script demonstrates how to use various pipeline tasks for quick inference
-without manually managing models, tokenizers, or preprocessing.
-
-Usage:
-    python quick_inference.py --task text-generation --model gpt2 --input "Hello world"
-    python quick_inference.py --task sentiment-analysis --input "I love this!"
+This script demonstrates how to quickly use pre-trained models for inference
+across various tasks using the pipeline API.
 """

-import argparse
-from transformers import pipeline, infer_device
+from transformers import pipeline
+
+
+def text_classification_example():
+    """Sentiment analysis example."""
+    print("=== Text Classification ===")
+    classifier = pipeline("text-classification")
+    result = classifier("I love using Transformers! It makes NLP so easy.")
+    print(f"Result: {result}\n")
+
+
+def named_entity_recognition_example():
+    """Named Entity Recognition example."""
+    print("=== Named Entity Recognition ===")
+    ner = pipeline("token-classification", aggregation_strategy="simple")
+    text = "My name is Sarah and I work at Microsoft in Seattle"
+    entities = ner(text)
+    for entity in entities:
+        print(f"{entity['word']}: {entity['entity_group']} (score: {entity['score']:.3f})")
+    print()
+
+
+def question_answering_example():
+    """Question Answering example."""
+    print("=== Question Answering ===")
+    qa = pipeline("question-answering")
+    context = "Paris is the capital and most populous city of France. It is located in northern France."
+    question = "What is the capital of France?"
+    answer = qa(question=question, context=context)
+    print(f"Question: {question}")
+    print(f"Answer: {answer['answer']} (score: {answer['score']:.3f})\n")
+
+
+def text_generation_example():
+    """Text generation example."""
+    print("=== Text Generation ===")
+    generator = pipeline("text-generation", model="gpt2")
+    prompt = "Once upon a time in a land far away"
+    generated = generator(prompt, max_length=50, num_return_sequences=1)
+    print(f"Prompt: {prompt}")
+    print(f"Generated: {generated[0]['generated_text']}\n")
+
+
+def summarization_example():
+    """Text summarization example."""
+    print("=== Summarization ===")
+    summarizer = pipeline("summarization")
+    article = """
+    The Transformers library provides thousands of pretrained models to perform tasks
+    on texts such as classification, information extraction, question answering,
+    summarization, translation, text generation, etc in over 100 languages. Its aim
+    is to make cutting-edge NLP easier to use for everyone. The library provides APIs
+    to quickly download and use pretrained models on a given text, fine-tune them on
+    your own datasets then share them with the community on the model hub.
+    """
+    summary = summarizer(article, max_length=50, min_length=25, do_sample=False)
+    print(f"Summary: {summary[0]['summary_text']}\n")
+
+
+def translation_example():
+    """Translation example."""
+    print("=== Translation ===")
+    translator = pipeline("translation_en_to_fr")
+    text = "Hello, how are you today?"
+    translation = translator(text)
+    print(f"English: {text}")
+    print(f"French: {translation[0]['translation_text']}\n")
+
+
+def zero_shot_classification_example():
+    """Zero-shot classification example."""
+    print("=== Zero-Shot Classification ===")
+    classifier = pipeline("zero-shot-classification")
+    text = "This is a breaking news story about a major earthquake."
+    candidate_labels = ["politics", "sports", "science", "breaking news"]
+    result = classifier(text, candidate_labels)
+    print(f"Text: {text}")
+    print("Predictions:")
+    for label, score in zip(result['labels'], result['scores']):
+        print(f"  {label}: {score:.3f}")
+    print()
+
+
+def image_classification_example():
+    """Image classification example (requires PIL)."""
+    print("=== Image Classification ===")
+    try:
+        from PIL import Image
+        import requests
+
+        classifier = pipeline("image-classification")
+        url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+        image = Image.open(requests.get(url, stream=True).raw)
+        predictions = classifier(image)
+        print("Top predictions:")
+        for pred in predictions[:3]:
+            print(f"  {pred['label']}: {pred['score']:.3f}")
+        print()
+    except ImportError:
+        print("PIL not installed. Skipping image classification example.\n")


 def main():
-    parser = argparse.ArgumentParser(description="Quick inference with Transformers pipelines")
-    parser.add_argument(
-        "--task",
-        type=str,
-        required=True,
-        help="Pipeline task (text-generation, sentiment-analysis, question-answering, etc.)",
-    )
-    parser.add_argument(
-        "--model",
-        type=str,
-        default=None,
-        help="Model name or path (default: use task default)",
-    )
-    parser.add_argument(
-        "--input",
-        type=str,
-        required=True,
-        help="Input text for inference",
-    )
-    parser.add_argument(
-        "--context",
-        type=str,
-        default=None,
-        help="Context for question-answering tasks",
-    )
-    parser.add_argument(
-        "--max-length",
-        type=int,
-        default=50,
-        help="Maximum generation length",
-    )
-    parser.add_argument(
-        "--device",
-        type=str,
-        default=None,
-        help="Device (cuda, cpu, or auto-detect)",
-    )
+    """Run all examples."""
+    print("Transformers Quick Inference Examples")
+    print("=" * 50 + "\n")

-    args = parser.parse_args()
+    # Text tasks
+    text_classification_example()
+    named_entity_recognition_example()
+    question_answering_example()
+    text_generation_example()
+    summarization_example()
+    translation_example()
+    zero_shot_classification_example()

-    # Auto-detect device if not specified
-    if args.device is None:
-        device = infer_device()
-    else:
-        device = args.device
+    # Vision task (optional)
+    image_classification_example()

-    print(f"Using device: {device}")
-    print(f"Task: {args.task}")
-    print(f"Model: {args.model or 'default'}")
-    print("-" * 50)
-
-    # Create pipeline
-    pipe = pipeline(
-        args.task,
-        model=args.model,
-        device=device,
-    )
-
-    # Run inference based on task
-    if args.task == "question-answering":
-        if args.context is None:
-            print("Error: --context required for question-answering")
-            return
-        result = pipe(question=args.input, context=args.context)
-        print(f"Question: {args.input}")
-        print(f"Context: {args.context}")
-        print(f"\nAnswer: {result['answer']}")
-        print(f"Score: {result['score']:.4f}")
-
-    elif args.task == "text-generation":
-        result = pipe(args.input, max_length=args.max_length)
-        print(f"Prompt: {args.input}")
-        print(f"\nGenerated: {result[0]['generated_text']}")
-
-    elif args.task in ["sentiment-analysis", "text-classification"]:
-        result = pipe(args.input)
-        print(f"Text: {args.input}")
-        print(f"\nLabel: {result[0]['label']}")
-        print(f"Score: {result[0]['score']:.4f}")
-
-    else:
-        # Generic handling for other tasks
-        result = pipe(args.input)
-        print(f"Input: {args.input}")
-        print(f"\nResult: {result}")
+    print("=" * 50)
+    print("All examples completed!")


 if __name__ == "__main__":