mirror of
https://github.com/K-Dense-AI/claude-scientific-skills.git
synced 2026-03-27 07:09:27 +08:00
Improve GeoMaster skill with Rust support, troubleshooting, and modern workflows
Major improvements to the GeoMaster geospatial science skill: ### New Features - Added Rust geospatial support (GeoRust crates: geo, proj, shapefile, rstar) - Added comprehensive coordinate systems reference documentation - Added troubleshooting guide with common error fixes - Added cloud-native workflows (STAC, Planetary Computer, COG) - Added automatic skill activation configuration ### Reference Documentation - NEW: references/coordinate-systems.md - CRS fundamentals, UTM zones, EPSG codes - NEW: references/troubleshooting.md - Installation fixes, runtime errors, performance tips - UPDATED: references/programming-languages.md - Now covers 8 languages (added Rust) ### Main Skill File - Streamlined SKILL.md from 690 to 362 lines (500-line rule compliance) - Enhanced installation instructions with uv and conda - Added modern cloud-native workflow examples - Added performance optimization tips ### Documentation - NEW: GEOMASTER_IMPROVEMENTS.md - Complete changelog and testing guide - UPDATED: README.md - Highlight new capabilities ### Skill Activation - Created skill-rules.json with 150+ keywords and 50+ intent patterns - Supports file-based and content-based automatic activation The skill now covers 8 programming languages (Python, R, Julia, JavaScript, C++, Java, Go, Rust) with 500+ code examples across 70+ geospatial topics.
This commit is contained in:
439
scientific-skills/geomaster/references/troubleshooting.md
Normal file
439
scientific-skills/geomaster/references/troubleshooting.md
Normal file
@@ -0,0 +1,439 @@
|
||||
# GeoMaster Troubleshooting Guide
|
||||
|
||||
Solutions to common geospatial problems and debugging strategies.
|
||||
|
||||
## Installation Issues
|
||||
|
||||
### GDAL Installation Problems
|
||||
|
||||
```bash
|
||||
# Problem: "gdal-config not found" or rasterio install fails
|
||||
|
||||
# Solution 1: Use conda (recommended)
|
||||
conda install -c conda-forge gdal rasterio
|
||||
|
||||
# Solution 2: System packages (Ubuntu/Debian)
|
||||
sudo apt-get install gdal-bin libgdal-dev
|
||||
export CPLUS_INCLUDE_PATH=/usr/include/gdal
|
||||
export C_INCLUDE_PATH=/usr/include/gdal
|
||||
pip install rasterio
|
||||
|
||||
# Solution 3: Wheel files
|
||||
pip install rasterio --find-links=https://gis.wheelwrights.com/
|
||||
|
||||
# Verify installation
|
||||
python -c "from osgeo import gdal; print(gdal.__version__)"
|
||||
python -c "import rasterio; print(rasterio.__version__)"
|
||||
```
|
||||
|
||||
### Python Binding Issues
|
||||
|
||||
```bash
|
||||
# Problem: "DLL load failed" on Windows
|
||||
# Solution: Reinstall with conda
|
||||
conda install -c conda-forge --force-reinstall gdal rasterio fiona
|
||||
|
||||
# Problem: "Symbol not found" on macOS
|
||||
# Solution: Rebuild from source or use conda
|
||||
brew install gdal
|
||||
pip install rasterio --no-binary rasterio
|
||||
|
||||
# Problem: GEOS errors
|
||||
brew install geos
|
||||
pip install shapely --no-binary shapely
|
||||
```
|
||||
|
||||
## Runtime Errors
|
||||
|
||||
### CRS Transformation Errors
|
||||
|
||||
```python
|
||||
# Problem: "Invalid projection" or "CRS mismatch"
|
||||
import geopandas as gpd
|
||||
|
||||
# Check CRS
|
||||
print(f"CRS: {gdf.crs}")
|
||||
|
||||
# If None, set it
|
||||
if gdf.crs is None:
|
||||
gdf.set_crs("EPSG:4326", inplace=True)
|
||||
|
||||
# If unknown, try to detect
|
||||
gdf = gdf.to_crs(gdf.estimate_utm_crs())
|
||||
```
|
||||
|
||||
### Memory Errors with Large Rasters
|
||||
|
||||
```python
|
||||
# Problem: "MemoryError" when reading large files
|
||||
# Solution: Read in chunks or use windows
|
||||
|
||||
import rasterio
|
||||
from rasterio.windows import Window
|
||||
|
||||
# Windowed reading
|
||||
with rasterio.open('large.tif') as src:
|
||||
window = Window(0, 0, 1000, 1000) # (col_off, row_off, width, height)
|
||||
subset = src.read(1, window=window)
|
||||
|
||||
# Block-by-block processing
|
||||
with rasterio.open('large.tif') as src:
|
||||
for i, window in src.block_windows(1):
|
||||
block = src.read(1, window=window)
|
||||
# Process block...
|
||||
|
||||
# Use Dask for very large files
|
||||
import dask.array as da
|
||||
dask_array = da.from_rasterio('large.tif', chunks=(1, 1024, 1024))
|
||||
```
|
||||
|
||||
### Geometry Validation Errors
|
||||
|
||||
```python
|
||||
# Problem: "TopologyException" or "Self-intersection"
|
||||
import geopandas as gpd
|
||||
from shapely.validation import make_valid
|
||||
|
||||
# Check invalid geometries
|
||||
invalid = gdf[~gdf.is_valid]
|
||||
print(f"Invalid geometries: {len(invalid)}")
|
||||
|
||||
# Fix invalid geometries
|
||||
gdf['geometry'] = gdf.geometry.make_valid()
|
||||
|
||||
# Buffer with 0 to fix (alternative method)
|
||||
gdf['geometry'] = gdf.geometry.buffer(0)
|
||||
```
|
||||
|
||||
### Coordinate Order Confusion
|
||||
|
||||
```python
|
||||
# Problem: Points in wrong location (lon/lat swapped)
|
||||
from pyproj import Transformer
|
||||
|
||||
# Common mistake: lon, lat vs lat, lon
|
||||
# Always specify axis order
|
||||
transformer = Transformer.from_crs(
|
||||
"EPSG:4326",
|
||||
"EPSG:32610",
|
||||
always_xy=True # Input: x=lon, y=lat (not y=lat, x=lon)
|
||||
)
|
||||
|
||||
# Correct usage
|
||||
x, y = transformer.transform(lon, lat) # not lat, lon
|
||||
```
|
||||
|
||||
## Performance Issues
|
||||
|
||||
### Slow Spatial Joins
|
||||
|
||||
```python
|
||||
# Problem: sjoin takes too long
|
||||
import geopandas as gpd
|
||||
|
||||
# Solution: Use spatial index
|
||||
gdf1.sindex # Auto-created
|
||||
gdf2.sindex
|
||||
|
||||
# For nearest neighbor joins, use specialized function
|
||||
result = gpd.sjoin_nearest(gdf1, gdf2, max_distance=1000)
|
||||
|
||||
# Use intersection predicate (faster than 'intersects' for points)
|
||||
result = gpd.sjoin(points, polygons, predicate='within')
|
||||
|
||||
# Clip to bounding box first
|
||||
bbox = gdf1.total_bounds
|
||||
gdf2_clipped = gdf2.cx[bbox[0]:bbox[2], bbox[1]:bbox[3]]
|
||||
result = gpd.sjoin(gdf1, gdf2_clipped, predicate='intersects')
|
||||
```
|
||||
|
||||
### Slow Raster I/O
|
||||
|
||||
```python
|
||||
# Problem: Reading/writing rasters is slow
|
||||
import rasterio
|
||||
|
||||
# Solution 1: Use compression when writing
|
||||
profile.update(
|
||||
compress='DEFLATE',
|
||||
predictor=2,
|
||||
zlevel=4
|
||||
)
|
||||
|
||||
# Solution 2: Use tiled output
|
||||
profile.update(
|
||||
tiled=True,
|
||||
blockxsize=256,
|
||||
blockysize=256
|
||||
)
|
||||
|
||||
# Solution 3: Enable caching
|
||||
from osgeo import gdal
|
||||
gdal.SetCacheMax(2**30) # 1GB
|
||||
|
||||
# Solution 4: Use COG format for cloud access
|
||||
from rio_cogeo.cogeo import cog_translate
|
||||
cog_translate('input.tif', 'output.tif', profile)
|
||||
```
|
||||
|
||||
### Slow Reprojection
|
||||
|
||||
```python
|
||||
# Problem: to_crs() is very slow
|
||||
import geopandas as gpd
|
||||
|
||||
# Solution 1: Simplify geometry first
|
||||
gdf_simple = gdf.geometry.simplify(tolerance=0.0001)
|
||||
gdf_reproj = gdf_simple.to_crs(target_crs)
|
||||
|
||||
# Solution 2: Use lower precision for display
|
||||
gdf_reproj = gdf.to_crs(target_crs, geometry_precision=2)
|
||||
|
||||
# Solution 3: Reproject in parallel
|
||||
import multiprocessing as mp
|
||||
from functools import partial
|
||||
|
||||
def reproj_chunk(chunk, target_crs):
|
||||
return chunk.to_crs(target_crs)
|
||||
|
||||
chunks = np.array_split(gdf, mp.cpu_count())
|
||||
with mp.Pool() as pool:
|
||||
results = pool.map(partial(reproj_chunk, target_crs=target_crs), chunks)
|
||||
gdf_reproj = gpd.GeoDataFrame(pd.concat(results))
|
||||
```
|
||||
|
||||
## Common Pitfalls
|
||||
|
||||
### Area in Degrees
|
||||
|
||||
```python
|
||||
# WRONG: Area in square degrees
|
||||
gdf = gpd.read_file('data.geojson')
|
||||
area = gdf.geometry.area # Wrong!
|
||||
|
||||
# CORRECT: Use projected CRS
|
||||
gdf_proj = gdf.to_crs(gdf.estimate_utm_crs())
|
||||
area_sqm = gdf_proj.geometry.area
|
||||
area_sqkm = area_sqm / 1_000_000
|
||||
```
|
||||
|
||||
### Buffer in Geographic CRS
|
||||
|
||||
```python
|
||||
# WRONG: Buffer of 1000 degrees
|
||||
gdf['buffer'] = gdf.geometry.buffer(1000)
|
||||
|
||||
# CORRECT: Project first
|
||||
gdf_proj = gdf.to_crs("EPSG:32610")
|
||||
gdf_proj['buffer_km'] = gdf_proj.geometry.buffer(1000) # 1000 meters
|
||||
```
|
||||
|
||||
### Web Mercator Distortion
|
||||
|
||||
```python
|
||||
# WRONG: Area calculation in Web Mercator
|
||||
gdf = gdf.to_crs("EPSG:3857")
|
||||
area = gdf.geometry.area # Significant distortion!
|
||||
|
||||
# CORRECT: Use appropriate projection
|
||||
gdf = gdf.to_crs(gdf.estimate_utm_crs())
|
||||
area = gdf.geometry.area # Accurate
|
||||
```
|
||||
|
||||
### Mixing CRS
|
||||
|
||||
```python
|
||||
# WRONG: Spatial join without checking CRS
|
||||
result = gpd.sjoin(gdf1, gdf2, predicate='intersects')
|
||||
|
||||
# CORRECT: Ensure same CRS
|
||||
if gdf1.crs != gdf2.crs:
|
||||
gdf2 = gdf2.to_crs(gdf1.crs)
|
||||
result = gpd.sjoin(gdf1, gdf2, predicate='intersects')
|
||||
```
|
||||
|
||||
## Data Issues
|
||||
|
||||
### Missing/Missing CRS
|
||||
|
||||
```python
|
||||
# Problem: CRS is None
|
||||
gdf = gpd.read_file('data.geojson')
|
||||
if gdf.crs is None:
|
||||
# Try to detect from data extent
|
||||
lon_min, lat_min, lon_max, lat_max = gdf.total_bounds
|
||||
|
||||
if -180 <= lon_min <= 180 and -90 <= lat_min <= 90:
|
||||
gdf.set_crs("EPSG:4326", inplace=True)
|
||||
print("Assumed WGS 84 (EPSG:4326)")
|
||||
else:
|
||||
gdf.set_crs(gdf.estimate_utm_crs(), inplace=True)
|
||||
print("Estimated UTM zone")
|
||||
```
|
||||
|
||||
### Invalid Coordinates
|
||||
|
||||
```python
|
||||
# Problem: Coordinates out of valid range
|
||||
gdf = gpd.read_file('data.geojson')
|
||||
|
||||
# Check for invalid coordinates
|
||||
invalid_lon = (gdf.geometry.x < -180) | (gdf.geometry.x > 180)
|
||||
invalid_lat = (gdf.geometry.y < -90) | (gdf.geometry.y > 90)
|
||||
|
||||
if invalid_lon.any() or invalid_lat.any():
|
||||
print("Warning: Invalid coordinates found")
|
||||
gdf = gdf[~invalid_lon & ~invalid_lat]
|
||||
```
|
||||
|
||||
### Empty Geometries
|
||||
|
||||
```python
|
||||
# Problem: Processing fails with empty geometries
|
||||
# Remove empty geometries
|
||||
gdf = gdf[~gdf.geometry.is_empty]
|
||||
|
||||
# Or fill with None
|
||||
gdf.loc[gdf.geometry.is_empty, 'geometry'] = None
|
||||
|
||||
# Check before operations
|
||||
if gdf.geometry.is_empty.any():
|
||||
print(f"Warning: {gdf.geometry.is_empty.sum()} empty geometries")
|
||||
```
|
||||
|
||||
## Remote Sensing Issues
|
||||
|
||||
### Sentinel-2 Band Ordering
|
||||
|
||||
```python
|
||||
# Problem: Wrong band indices
|
||||
# Sentinel-2 L2A SAFE structure:
|
||||
# B01 (60m), B02 (10m), B03 (10m), B04 (10m), B05 (20m),
|
||||
# B06 (20m), B07 (20m), B08 (10m), B08A (20m), B09 (60m),
|
||||
# B11 (20m), B12 (20m)
|
||||
|
||||
# Sentinel-2 (resampled to 10m):
|
||||
# B02=1, B03=2, B04=3, B05=4, B06=5, B07=6, B08=7, B8A=8, B11=9, B12=10
|
||||
|
||||
# For 10m bands only:
|
||||
blue = src.read(1) # B02
|
||||
green = src.read(2) # B03
|
||||
red = src.read(3) # B04
|
||||
nir = src.read(4) # B08
|
||||
```
|
||||
|
||||
### Cloud Shadow Masking
|
||||
|
||||
```python
|
||||
# Problem: Clouds and shadows not properly masked
|
||||
def improved_cloud_mask(scl):
|
||||
"""
|
||||
Improved cloud masking using SCL layer.
|
||||
Classes: 0=No data, 1=Saturated, 2=Dark, 3=Cloud shadow,
|
||||
4=Vegetation, 5=Bare soil, 6=Water, 7=Cloud low prob,
|
||||
8=Cloud med prob, 9=Cloud high prob, 10=Thin cirrus
|
||||
"""
|
||||
# Mask: clouds, cloud shadows, saturated
|
||||
mask = scl.isin([0, 1, 3, 8, 9, 10])
|
||||
return mask
|
||||
|
||||
# Apply
|
||||
scl = s2_image.select('SCL')
|
||||
cloud_mask = improved_cloud_mask(scl)
|
||||
image_clean = s2_image.updateMask(cloud_mask.Not())
|
||||
```
|
||||
|
||||
## Error Messages Reference
|
||||
|
||||
| Error | Cause | Solution |
|
||||
|-------|-------|----------|
|
||||
| `CRS mismatch` | Different coordinate systems | `gdf2 = gdf2.to_crs(gdf1.crs)` |
|
||||
| `TopologyException` | Invalid/self-intersecting geometry | `gdf.geometry = gdf.geometry.make_valid()` |
|
||||
| `MemoryError` | Large dataset | Use Dask or chunked reading |
|
||||
| `Invalid projection` | Unknown CRS code | Check EPSG code, use `CRS.from_user_input()` |
|
||||
| `Empty geometry` | Null geometries | `gdf = gdf[~gdf.geometry.is_empty]` |
|
||||
| `Bounds error` | Coordinates out of range | Filter invalid coordinates |
|
||||
| `DLL load failed` | GDAL not installed | Use conda: `conda install gdal` |
|
||||
| `Symbol not found` | Library linking issue | Reinstall with binary wheels or conda |
|
||||
| `Self-intersection` | Invalid polygon | Buffer(0) or make_valid() |
|
||||
|
||||
## Debugging Strategies
|
||||
|
||||
### 1. Check Data Integrity
|
||||
|
||||
```python
|
||||
def check_geodataframe(gdf):
|
||||
"""Comprehensive GeoDataFrame health check."""
|
||||
print(f"Shape: {gdf.shape}")
|
||||
print(f"CRS: {gdf.crs}")
|
||||
print(f"Bounds: {gdf.total_bounds}")
|
||||
print(f"Invalid geometries: {(~gdf.is_valid).sum()}")
|
||||
print(f"Empty geometries: {gdf.geometry.is_empty.sum()}")
|
||||
print(f"None geometries: {gdf.geometry.isna().sum()}")
|
||||
print(f"Duplicate geometries: {gdf.geometry.duplicated().sum()}")
|
||||
print("\nGeometry types:")
|
||||
print(gdf.geometry.type.value_counts())
|
||||
print("\nCoordinate range:")
|
||||
print(f" X: {gdf.geometry.x.min():.2f} to {gdf.geometry.x.max():.2f}")
|
||||
print(f" Y: {gdf.geometry.y.min():.2f} to {gdf.geometry.y.max():.2f}")
|
||||
|
||||
check_geodataframe(gdf)
|
||||
```
|
||||
|
||||
### 2. Test Transformations
|
||||
|
||||
```python
|
||||
def test_reprojection(gdf, target_crs):
|
||||
"""Test if reprojection is reversible."""
|
||||
original = gdf.copy()
|
||||
gdf_proj = gdf.to_crs(target_crs)
|
||||
gdf_back = gdf_proj.to_crs(gdf.crs)
|
||||
|
||||
diff = original.geometry.distance(gdf_back.geometry).max()
|
||||
if diff > 1: # More than 1 meter
|
||||
print(f"Warning: Max error: {diff:.2f}m")
|
||||
return False
|
||||
return True
|
||||
```
|
||||
|
||||
### 3. Profile Code
|
||||
|
||||
```python
|
||||
import time
|
||||
|
||||
def time_operation(func, *args, **kwargs):
|
||||
"""Time a geospatial operation."""
|
||||
start = time.time()
|
||||
result = func(*args, **kwargs)
|
||||
elapsed = time.time() - start
|
||||
print(f"{func.__name__}: {elapsed:.2f}s")
|
||||
return result
|
||||
|
||||
# Usage
|
||||
time_operation(gdf.to_crs, "EPSG:32610")
|
||||
```
|
||||
|
||||
## Getting Help
|
||||
|
||||
### Check Versions
|
||||
|
||||
```python
|
||||
import sys
|
||||
import geopandas as gpd
|
||||
import rasterio
|
||||
from osgeo import gdal
|
||||
|
||||
print(f"Python: {sys.version}")
|
||||
print(f"GeoPandas: {gpd.__version__}")
|
||||
print(f"Rasterio: {rasterio.__version__}")
|
||||
print(f"GDAL: {gdal.__version__}")
|
||||
print(f"PROJ: {gdal.VersionInfo('PROJ')}")
|
||||
```
|
||||
|
||||
### Useful Resources
|
||||
|
||||
- **GeoPandas docs**: https://geopandas.org/
|
||||
- **Rasterio docs**: https://rasterio.readthedocs.io/
|
||||
- **PROJ datab**: https://epsg.org/
|
||||
- **Stack Overflow**: Tag with `gis` and `python`
|
||||
- **GIS Stack Exchange**: https://gis.stackexchange.com/
|
||||
Reference in New Issue
Block a user