Add GeoMaster: Comprehensive Geospatial Science Skill

- Added SKILL.md with installation, quick start, core concepts, workflows - Added 12 reference documentation files covering 70+ topics - Includes 500+ code examples across 7 programming languages - Covers remote sensing, GIS, ML/AI, 30+ scientific domains - MIT License Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> Co-Authored-By: Dr. Umair Rabbani <umairrs@gmail.com>
2026-03-27 07:09:27 +08:00 · 2026-03-01 13:42:41 +05:00
parent 29c869326e
commit 4787f98d98
14 changed files with 5526 additions and 0 deletions
--- a/scientific-skills/geomaster/references/machine-learning.md
+++ b/scientific-skills/geomaster/references/machine-learning.md
@@ -0,0 +1,462 @@
+# Machine Learning for Geospatial Data
+
+Guide to ML and deep learning applications for remote sensing and spatial analysis.
+
+## Traditional Machine Learning
+
+### Random Forest for Land Cover
+
+```python
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import classification_report, confusion_matrix
+import rasterio
+from rasterio.features import rasterize
+import geopandas as gpd
+import numpy as np
+
+def train_random_forest_classifier(raster_path, training_gdf):
+    """Train Random Forest for image classification."""
+
+    # Load imagery
+    with rasterio.open(raster_path) as src:
+        image = src.read()
+        profile = src.profile
+        transform = src.transform
+
+    # Extract training data
+    X, y = [], []
+
+    for _, row in training_gdf.iterrows():
+        mask = rasterize(
+            [(row.geometry, 1)],
+            out_shape=(profile['height'], profile['width']),
+            transform=transform,
+            fill=0,
+            dtype=np.uint8
+        )
+        pixels = image[:, mask > 0].T
+        X.extend(pixels)
+        y.extend([row['class_id']] * len(pixels))
+
+    X = np.array(X)
+    y = np.array(y)
+
+    # Split data
+    X_train, X_val, y_train, y_val = train_test_split(
+        X, y, test_size=0.2, random_state=42, stratify=y
+    )
+
+    # Train model
+    rf = RandomForestClassifier(
+        n_estimators=100,
+        max_depth=20,
+        min_samples_split=10,
+        min_samples_leaf=4,
+        class_weight='balanced',
+        n_jobs=-1,
+        random_state=42
+    )
+    rf.fit(X_train, y_train)
+
+    # Validate
+    y_pred = rf.predict(X_val)
+    print("Classification Report:")
+    print(classification_report(y_val, y_pred))
+
+    # Feature importance
+    feature_names = [f'Band_{i}' for i in range(X.shape[1])]
+    importances = pd.DataFrame({
+        'feature': feature_names,
+        'importance': rf.feature_importances_
+    }).sort_values('importance', ascending=False)
+
+    print("\nFeature Importance:")
+    print(importances)
+
+    return rf
+
+# Classify full image
+def classify_image(model, image_path, output_path):
+    with rasterio.open(image_path) as src:
+        image = src.read()
+        profile = src.profile
+
+    image_reshaped = image.reshape(image.shape[0], -1).T
+    prediction = model.predict(image_reshaped)
+    prediction = prediction.reshape(image.shape[1], image.shape[2])
+
+    profile.update(dtype=rasterio.uint8, count=1)
+    with rasterio.open(output_path, 'w', **profile) as dst:
+        dst.write(prediction.astype(rasterio.uint8), 1)
+```
+
+### Support Vector Machine
+
+```python
+from sklearn.svm import SVC
+from sklearn.preprocessing import StandardScaler
+
+def svm_classifier(X_train, y_train):
+    """SVM classifier for remote sensing."""
+
+    # Scale features
+    scaler = StandardScaler()
+    X_train_scaled = scaler.fit_transform(X_train)
+
+    # Train SVM
+    svm = SVC(
+        kernel='rbf',
+        C=100,
+        gamma='scale',
+        class_weight='balanced',
+        probability=True
+    )
+    svm.fit(X_train_scaled, y_train)
+
+    return svm, scaler
+
+# Multi-class classification
+def multiclass_svm(X_train, y_train):
+    from sklearn.multiclass import OneVsRestClassifier
+
+    scaler = StandardScaler()
+    X_train_scaled = scaler.fit_transform(X_train)
+
+    svm_ovr = OneVsRestClassifier(
+        SVC(kernel='rbf', C=10, probability=True),
+        n_jobs=-1
+    )
+    svm_ovr.fit(X_train_scaled, y_train)
+
+    return svm_ovr, scaler
+```
+
+## Deep Learning
+
+### CNN with TorchGeo
+
+```python
+import torch
+import torch.nn as nn
+import torchgeo.datasets as datasets
+import torchgeo.models as models
+from torch.utils.data import DataLoader
+
+# Define CNN
+class LandCoverCNN(nn.Module):
+    def __init__(self, in_channels=12, num_classes=10):
+        super().__init__()
+        self.encoder = nn.Sequential(
+            nn.Conv2d(in_channels, 64, 3, padding=1),
+            nn.BatchNorm2d(64),
+            nn.ReLU(),
+            nn.MaxPool2d(2),
+
+            nn.Conv2d(64, 128, 3, padding=1),
+            nn.BatchNorm2d(128),
+            nn.ReLU(),
+            nn.MaxPool2d(2),
+
+            nn.Conv2d(128, 256, 3, padding=1),
+            nn.BatchNorm2d(256),
+            nn.ReLU(),
+            nn.MaxPool2d(2),
+        )
+
+        self.decoder = nn.Sequential(
+            nn.ConvTranspose2d(256, 128, 2, stride=2),
+            nn.BatchNorm2d(128),
+            nn.ReLU(),
+
+            nn.ConvTranspose2d(128, 64, 2, stride=2),
+            nn.BatchNorm2d(64),
+            nn.ReLU(),
+
+            nn.ConvTranspose2d(64, num_classes, 2, stride=2),
+        )
+
+    def forward(self, x):
+        x = self.encoder(x)
+        x = self.decoder(x)
+        return x
+
+# Training
+def train_model(train_loader, val_loader, num_epochs=50):
+    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+    model = LandCoverCNN().to(device)
+
+    criterion = nn.CrossEntropyLoss()
+    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
+
+    for epoch in range(num_epochs):
+        model.train()
+        train_loss = 0
+
+        for images, labels in train_loader:
+            images, labels = images.to(device), labels.to(device)
+
+            optimizer.zero_grad()
+            outputs = model(images)
+            loss = criterion(outputs, labels)
+            loss.backward()
+            optimizer.step()
+
+            train_loss += loss.item()
+
+        # Validation
+        model.eval()
+        val_loss = 0
+        with torch.no_grad():
+            for images, labels in val_loader:
+                images, labels = images.to(device), labels.to(device)
+                outputs = model(images)
+                loss = criterion(outputs, labels)
+                val_loss += loss.item()
+
+        print(f'Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}')
+
+    return model
+```
+
+### U-Net for Semantic Segmentation
+
+```python
+class UNet(nn.Module):
+    def __init__(self, in_channels=4, num_classes=5):
+        super().__init__()
+
+        # Encoder
+        self.enc1 = self.conv_block(in_channels, 64)
+        self.enc2 = self.conv_block(64, 128)
+        self.enc3 = self.conv_block(128, 256)
+        self.enc4 = self.conv_block(256, 512)
+
+        # Bottleneck
+        self.bottleneck = self.conv_block(512, 1024)
+
+        # Decoder
+        self.up1 = nn.ConvTranspose2d(1024, 512, 2, stride=2)
+        self.dec1 = self.conv_block(1024, 512)
+
+        self.up2 = nn.ConvTranspose2d(512, 256, 2, stride=2)
+        self.dec2 = self.conv_block(512, 256)
+
+        self.up3 = nn.ConvTranspose2d(256, 128, 2, stride=2)
+        self.dec3 = self.conv_block(256, 128)
+
+        self.up4 = nn.ConvTranspose2d(128, 64, 2, stride=2)
+        self.dec4 = self.conv_block(128, 64)
+
+        # Final layer
+        self.final = nn.Conv2d(64, num_classes, 1)
+
+    def conv_block(self, in_ch, out_ch):
+        return nn.Sequential(
+            nn.Conv2d(in_ch, out_ch, 3, padding=1),
+            nn.BatchNorm2d(out_ch),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(out_ch, out_ch, 3, padding=1),
+            nn.BatchNorm2d(out_ch),
+            nn.ReLU(inplace=True)
+        )
+
+    def forward(self, x):
+        # Encoder
+        e1 = self.enc1(x)
+        e2 = self.enc2(F.max_pool2d(e1, 2))
+        e3 = self.enc3(F.max_pool2d(e2, 2))
+        e4 = self.enc4(F.max_pool2d(e3, 2))
+
+        # Bottleneck
+        b = self.bottleneck(F.max_pool2d(e4, 2))
+
+        # Decoder with skip connections
+        d1 = self.dec1(torch.cat([self.up1(b), e4], dim=1))
+        d2 = self.dec2(torch.cat([self.up2(d1), e3], dim=1))
+        d3 = self.dec3(torch.cat([self.up3(d2), e2], dim=1))
+        d4 = self.dec4(torch.cat([self.up4(d3), e1], dim=1))
+
+        return self.final(d4)
+```
+
+### Change Detection with Siamese Network
+
+```python
+class SiameseNetwork(nn.Module):
+    """Siamese network for change detection."""
+
+    def __init__(self):
+        super().__init__()
+        self.feature_extractor = nn.Sequential(
+            nn.Conv2d(3, 32, 3, padding=1),
+            nn.BatchNorm2d(32),
+            nn.ReLU(),
+            nn.MaxPool2d(2),
+
+            nn.Conv2d(32, 64, 3, padding=1),
+            nn.BatchNorm2d(64),
+            nn.ReLU(),
+            nn.MaxPool2d(2),
+
+            nn.Conv2d(64, 128, 3, padding=1),
+            nn.BatchNorm2d(128),
+            nn.ReLU(),
+        )
+
+        self.classifier = nn.Sequential(
+            nn.Conv2d(256, 128, 3, padding=1),
+            nn.ReLU(),
+            nn.Conv2d(128, 64, 3, padding=1),
+            nn.ReLU(),
+            nn.Conv2d(64, 2, 1),  # Binary: change / no change
+        )
+
+    def forward(self, x1, x2):
+        f1 = self.feature_extractor(x1)
+        f2 = self.feature_extractor(x2)
+
+        # Concatenate features
+        diff = torch.abs(f1 - f2)
+        combined = torch.cat([f1, f2, diff], dim=1)
+
+        return self.classifier(combined)
+```
+
+## Graph Neural Networks
+
+### PyTorch Geometric for Spatial Data
+
+```python
+import torch
+from torch_geometric.data import Data
+from torch_geometric.nn import GCNConv
+
+# Create spatial graph
+def create_spatial_graph(points_gdf, k_neighbors=5):
+    """Create graph from point data using k-NN."""
+
+    from sklearn.neighbors import NearestNeighbors
+
+    coords = np.array([[p.x, p.y] for p in points_gdf.geometry])
+
+    # Find k-nearest neighbors
+    nbrs = NearestNeighbors(n_neighbors=k_neighbors).fit(coords)
+    distances, indices = nbrs.kneighbors(coords)
+
+    # Create edge index
+    edge_index = []
+    for i, neighbors in enumerate(indices):
+        for j in neighbors:
+            edge_index.append([i, j])
+
+    edge_index = torch.tensor(edge_index, dtype=torch.long).t().contiguous()
+
+    # Node features
+    features = points_gdf.drop('geometry', axis=1).values
+    x = torch.tensor(features, dtype=torch.float)
+
+    return Data(x=x, edge_index=edge_index)
+
+# GCN for spatial prediction
+class SpatialGCN(torch.nn.Module):
+    def __init__(self, num_features, hidden_channels=64):
+        super().__init__()
+        self.conv1 = GCNConv(num_features, hidden_channels)
+        self.conv2 = GCNConv(hidden_channels, hidden_channels)
+        self.conv3 = GCNConv(hidden_channels, 1)
+
+    def forward(self, data):
+        x, edge_index = data.x, data.edge_index
+        x = self.conv1(x, edge_index).relu()
+        x = F.dropout(x, p=0.5, training=self.training)
+        x = self.conv2(x, edge_index).relu()
+        x = self.conv3(x, edge_index)
+        return x
+```
+
+## Explainable AI (XAI) for Geospatial
+
+### SHAP for Model Interpretation
+
+```python
+import shap
+import numpy as np
+
+def explain_model(model, X, feature_names):
+    """Explain model predictions using SHAP."""
+
+    # Create explainer
+    explainer = shap.Explainer(model, X)
+
+    # Calculate SHAP values
+    shap_values = explainer(X)
+
+    # Summary plot
+    shap.summary_plot(shap_values, X, feature_names=feature_names)
+
+    # Dependence plot for important features
+    for i in range(X.shape[1]):
+        shap.dependence_plot(i, shap_values, X, feature_names=feature_names)
+
+    return shap_values
+
+# Spatial SHAP (accounting for spatial autocorrelation)
+def spatial_shap(model, X, coordinates):
+    """Spatial explanation considering neighborhood effects."""
+
+    # Compute SHAP values
+    explainer = shap.Explainer(model, X)
+    shap_values = explainer(X)
+
+    # Spatial aggregation
+    shap_spatial = {}
+    for i, coord in enumerate(coordinates):
+        # Find neighbors
+        neighbors = find_neighbors(coord, coordinates, radius=1000)
+
+        # Aggregate SHAP values for neighborhood
+        neighbor_shap = shap_values.values[neighbors]
+        shap_spatial[i] = np.mean(neighbor_shap, axis=0)
+
+    return shap_spatial
+```
+
+### Attention Maps for CNNs
+
+```python
+import cv2
+import torch
+import torch.nn.functional as F
+
+def generate_attention_map(model, image_tensor, target_layer):
+    """Generate attention map using Grad-CAM."""
+
+    # Forward pass
+    model.eval()
+    output = model(image_tensor)
+
+    # Backward pass
+    model.zero_grad()
+    output[0, torch.argmax(output)].backward()
+
+    # Get gradients
+    gradients = model.get_gradient(target_layer)
+
+    # Global average pooling
+    weights = torch.mean(gradients, axis=(2, 3), keepdim=True)
+
+    # Weighted combination of activation maps
+    activations = model.get_activation(target_layer)
+    attention = torch.sum(weights * activations, axis=1, keepdim=True)
+
+    # ReLU and normalize
+    attention = F.relu(attention)
+    attention = F.interpolate(attention, size=image_tensor.shape[2:],
+                              mode='bilinear', align_corners=False)
+    attention = (attention - attention.min()) / (attention.max() - attention.min())
+
+    return attention.squeeze().cpu().numpy()
+```
+
+For more ML examples, see [code-examples.md](code-examples.md).