mirror of
https://github.com/K-Dense-AI/claude-scientific-skills.git
synced 2026-03-27 07:09:27 +08:00
Major improvements to the GeoMaster geospatial science skill: ### New Features - Added Rust geospatial support (GeoRust crates: geo, proj, shapefile, rstar) - Added comprehensive coordinate systems reference documentation - Added troubleshooting guide with common error fixes - Added cloud-native workflows (STAC, Planetary Computer, COG) - Added automatic skill activation configuration ### Reference Documentation - NEW: references/coordinate-systems.md - CRS fundamentals, UTM zones, EPSG codes - NEW: references/troubleshooting.md - Installation fixes, runtime errors, performance tips - UPDATED: references/programming-languages.md - Now covers 8 languages (added Rust) ### Main Skill File - Streamlined SKILL.md from 690 to 362 lines (500-line rule compliance) - Enhanced installation instructions with uv and conda - Added modern cloud-native workflow examples - Added performance optimization tips ### Documentation - NEW: GEOMASTER_IMPROVEMENTS.md - Complete changelog and testing guide - UPDATED: README.md - Highlight new capabilities ### Skill Activation - Created skill-rules.json with 150+ keywords and 50+ intent patterns - Supports file-based and content-based automatic activation The skill now covers 8 programming languages (Python, R, Julia, JavaScript, C++, Java, Go, Rust) with 500+ code examples across 70+ geospatial topics.
440 lines
11 KiB
Markdown
440 lines
11 KiB
Markdown
# GeoMaster Troubleshooting Guide
|
|
|
|
Solutions to common geospatial problems and debugging strategies.
|
|
|
|
## Installation Issues
|
|
|
|
### GDAL Installation Problems
|
|
|
|
```bash
|
|
# Problem: "gdal-config not found" or rasterio install fails
|
|
|
|
# Solution 1: Use conda (recommended)
|
|
conda install -c conda-forge gdal rasterio
|
|
|
|
# Solution 2: System packages (Ubuntu/Debian)
|
|
sudo apt-get install gdal-bin libgdal-dev
|
|
export CPLUS_INCLUDE_PATH=/usr/include/gdal
|
|
export C_INCLUDE_PATH=/usr/include/gdal
|
|
pip install rasterio
|
|
|
|
# Solution 3: Wheel files
|
|
pip install rasterio --find-links=https://gis.wheelwrights.com/
|
|
|
|
# Verify installation
|
|
python -c "from osgeo import gdal; print(gdal.__version__)"
|
|
python -c "import rasterio; print(rasterio.__version__)"
|
|
```
|
|
|
|
### Python Binding Issues
|
|
|
|
```bash
|
|
# Problem: "DLL load failed" on Windows
|
|
# Solution: Reinstall with conda
|
|
conda install -c conda-forge --force-reinstall gdal rasterio fiona
|
|
|
|
# Problem: "Symbol not found" on macOS
|
|
# Solution: Rebuild from source or use conda
|
|
brew install gdal
|
|
pip install rasterio --no-binary rasterio
|
|
|
|
# Problem: GEOS errors
|
|
brew install geos
|
|
pip install shapely --no-binary shapely
|
|
```
|
|
|
|
## Runtime Errors
|
|
|
|
### CRS Transformation Errors
|
|
|
|
```python
|
|
# Problem: "Invalid projection" or "CRS mismatch"
|
|
import geopandas as gpd
|
|
|
|
# Check CRS
|
|
print(f"CRS: {gdf.crs}")
|
|
|
|
# If None, set it
|
|
if gdf.crs is None:
|
|
gdf.set_crs("EPSG:4326", inplace=True)
|
|
|
|
# If unknown, try to detect
|
|
gdf = gdf.to_crs(gdf.estimate_utm_crs())
|
|
```
|
|
|
|
### Memory Errors with Large Rasters
|
|
|
|
```python
|
|
# Problem: "MemoryError" when reading large files
|
|
# Solution: Read in chunks or use windows
|
|
|
|
import rasterio
|
|
from rasterio.windows import Window
|
|
|
|
# Windowed reading
|
|
with rasterio.open('large.tif') as src:
|
|
window = Window(0, 0, 1000, 1000) # (col_off, row_off, width, height)
|
|
subset = src.read(1, window=window)
|
|
|
|
# Block-by-block processing
|
|
with rasterio.open('large.tif') as src:
|
|
for i, window in src.block_windows(1):
|
|
block = src.read(1, window=window)
|
|
# Process block...
|
|
|
|
# Use Dask for very large files
|
|
import dask.array as da
|
|
dask_array = da.from_rasterio('large.tif', chunks=(1, 1024, 1024))
|
|
```
|
|
|
|
### Geometry Validation Errors
|
|
|
|
```python
|
|
# Problem: "TopologyException" or "Self-intersection"
|
|
import geopandas as gpd
|
|
from shapely.validation import make_valid
|
|
|
|
# Check invalid geometries
|
|
invalid = gdf[~gdf.is_valid]
|
|
print(f"Invalid geometries: {len(invalid)}")
|
|
|
|
# Fix invalid geometries
|
|
gdf['geometry'] = gdf.geometry.make_valid()
|
|
|
|
# Buffer with 0 to fix (alternative method)
|
|
gdf['geometry'] = gdf.geometry.buffer(0)
|
|
```
|
|
|
|
### Coordinate Order Confusion
|
|
|
|
```python
|
|
# Problem: Points in wrong location (lon/lat swapped)
|
|
from pyproj import Transformer
|
|
|
|
# Common mistake: lon, lat vs lat, lon
|
|
# Always specify axis order
|
|
transformer = Transformer.from_crs(
|
|
"EPSG:4326",
|
|
"EPSG:32610",
|
|
always_xy=True # Input: x=lon, y=lat (not y=lat, x=lon)
|
|
)
|
|
|
|
# Correct usage
|
|
x, y = transformer.transform(lon, lat) # not lat, lon
|
|
```
|
|
|
|
## Performance Issues
|
|
|
|
### Slow Spatial Joins
|
|
|
|
```python
|
|
# Problem: sjoin takes too long
|
|
import geopandas as gpd
|
|
|
|
# Solution: Use spatial index
|
|
gdf1.sindex # Auto-created
|
|
gdf2.sindex
|
|
|
|
# For nearest neighbor joins, use specialized function
|
|
result = gpd.sjoin_nearest(gdf1, gdf2, max_distance=1000)
|
|
|
|
# Use intersection predicate (faster than 'intersects' for points)
|
|
result = gpd.sjoin(points, polygons, predicate='within')
|
|
|
|
# Clip to bounding box first
|
|
bbox = gdf1.total_bounds
|
|
gdf2_clipped = gdf2.cx[bbox[0]:bbox[2], bbox[1]:bbox[3]]
|
|
result = gpd.sjoin(gdf1, gdf2_clipped, predicate='intersects')
|
|
```
|
|
|
|
### Slow Raster I/O
|
|
|
|
```python
|
|
# Problem: Reading/writing rasters is slow
|
|
import rasterio
|
|
|
|
# Solution 1: Use compression when writing
|
|
profile.update(
|
|
compress='DEFLATE',
|
|
predictor=2,
|
|
zlevel=4
|
|
)
|
|
|
|
# Solution 2: Use tiled output
|
|
profile.update(
|
|
tiled=True,
|
|
blockxsize=256,
|
|
blockysize=256
|
|
)
|
|
|
|
# Solution 3: Enable caching
|
|
from osgeo import gdal
|
|
gdal.SetCacheMax(2**30) # 1GB
|
|
|
|
# Solution 4: Use COG format for cloud access
|
|
from rio_cogeo.cogeo import cog_translate
|
|
cog_translate('input.tif', 'output.tif', profile)
|
|
```
|
|
|
|
### Slow Reprojection
|
|
|
|
```python
|
|
# Problem: to_crs() is very slow
|
|
import geopandas as gpd
|
|
|
|
# Solution 1: Simplify geometry first
|
|
gdf_simple = gdf.geometry.simplify(tolerance=0.0001)
|
|
gdf_reproj = gdf_simple.to_crs(target_crs)
|
|
|
|
# Solution 2: Use lower precision for display
|
|
gdf_reproj = gdf.to_crs(target_crs, geometry_precision=2)
|
|
|
|
# Solution 3: Reproject in parallel
|
|
import multiprocessing as mp
|
|
from functools import partial
|
|
|
|
def reproj_chunk(chunk, target_crs):
|
|
return chunk.to_crs(target_crs)
|
|
|
|
chunks = np.array_split(gdf, mp.cpu_count())
|
|
with mp.Pool() as pool:
|
|
results = pool.map(partial(reproj_chunk, target_crs=target_crs), chunks)
|
|
gdf_reproj = gpd.GeoDataFrame(pd.concat(results))
|
|
```
|
|
|
|
## Common Pitfalls
|
|
|
|
### Area in Degrees
|
|
|
|
```python
|
|
# WRONG: Area in square degrees
|
|
gdf = gpd.read_file('data.geojson')
|
|
area = gdf.geometry.area # Wrong!
|
|
|
|
# CORRECT: Use projected CRS
|
|
gdf_proj = gdf.to_crs(gdf.estimate_utm_crs())
|
|
area_sqm = gdf_proj.geometry.area
|
|
area_sqkm = area_sqm / 1_000_000
|
|
```
|
|
|
|
### Buffer in Geographic CRS
|
|
|
|
```python
|
|
# WRONG: Buffer of 1000 degrees
|
|
gdf['buffer'] = gdf.geometry.buffer(1000)
|
|
|
|
# CORRECT: Project first
|
|
gdf_proj = gdf.to_crs("EPSG:32610")
|
|
gdf_proj['buffer_km'] = gdf_proj.geometry.buffer(1000) # 1000 meters
|
|
```
|
|
|
|
### Web Mercator Distortion
|
|
|
|
```python
|
|
# WRONG: Area calculation in Web Mercator
|
|
gdf = gdf.to_crs("EPSG:3857")
|
|
area = gdf.geometry.area # Significant distortion!
|
|
|
|
# CORRECT: Use appropriate projection
|
|
gdf = gdf.to_crs(gdf.estimate_utm_crs())
|
|
area = gdf.geometry.area # Accurate
|
|
```
|
|
|
|
### Mixing CRS
|
|
|
|
```python
|
|
# WRONG: Spatial join without checking CRS
|
|
result = gpd.sjoin(gdf1, gdf2, predicate='intersects')
|
|
|
|
# CORRECT: Ensure same CRS
|
|
if gdf1.crs != gdf2.crs:
|
|
gdf2 = gdf2.to_crs(gdf1.crs)
|
|
result = gpd.sjoin(gdf1, gdf2, predicate='intersects')
|
|
```
|
|
|
|
## Data Issues
|
|
|
|
### Missing/Missing CRS
|
|
|
|
```python
|
|
# Problem: CRS is None
|
|
gdf = gpd.read_file('data.geojson')
|
|
if gdf.crs is None:
|
|
# Try to detect from data extent
|
|
lon_min, lat_min, lon_max, lat_max = gdf.total_bounds
|
|
|
|
if -180 <= lon_min <= 180 and -90 <= lat_min <= 90:
|
|
gdf.set_crs("EPSG:4326", inplace=True)
|
|
print("Assumed WGS 84 (EPSG:4326)")
|
|
else:
|
|
gdf.set_crs(gdf.estimate_utm_crs(), inplace=True)
|
|
print("Estimated UTM zone")
|
|
```
|
|
|
|
### Invalid Coordinates
|
|
|
|
```python
|
|
# Problem: Coordinates out of valid range
|
|
gdf = gpd.read_file('data.geojson')
|
|
|
|
# Check for invalid coordinates
|
|
invalid_lon = (gdf.geometry.x < -180) | (gdf.geometry.x > 180)
|
|
invalid_lat = (gdf.geometry.y < -90) | (gdf.geometry.y > 90)
|
|
|
|
if invalid_lon.any() or invalid_lat.any():
|
|
print("Warning: Invalid coordinates found")
|
|
gdf = gdf[~invalid_lon & ~invalid_lat]
|
|
```
|
|
|
|
### Empty Geometries
|
|
|
|
```python
|
|
# Problem: Processing fails with empty geometries
|
|
# Remove empty geometries
|
|
gdf = gdf[~gdf.geometry.is_empty]
|
|
|
|
# Or fill with None
|
|
gdf.loc[gdf.geometry.is_empty, 'geometry'] = None
|
|
|
|
# Check before operations
|
|
if gdf.geometry.is_empty.any():
|
|
print(f"Warning: {gdf.geometry.is_empty.sum()} empty geometries")
|
|
```
|
|
|
|
## Remote Sensing Issues
|
|
|
|
### Sentinel-2 Band Ordering
|
|
|
|
```python
|
|
# Problem: Wrong band indices
|
|
# Sentinel-2 L2A SAFE structure:
|
|
# B01 (60m), B02 (10m), B03 (10m), B04 (10m), B05 (20m),
|
|
# B06 (20m), B07 (20m), B08 (10m), B08A (20m), B09 (60m),
|
|
# B11 (20m), B12 (20m)
|
|
|
|
# Sentinel-2 (resampled to 10m):
|
|
# B02=1, B03=2, B04=3, B05=4, B06=5, B07=6, B08=7, B8A=8, B11=9, B12=10
|
|
|
|
# For 10m bands only:
|
|
blue = src.read(1) # B02
|
|
green = src.read(2) # B03
|
|
red = src.read(3) # B04
|
|
nir = src.read(4) # B08
|
|
```
|
|
|
|
### Cloud Shadow Masking
|
|
|
|
```python
|
|
# Problem: Clouds and shadows not properly masked
|
|
def improved_cloud_mask(scl):
|
|
"""
|
|
Improved cloud masking using SCL layer.
|
|
Classes: 0=No data, 1=Saturated, 2=Dark, 3=Cloud shadow,
|
|
4=Vegetation, 5=Bare soil, 6=Water, 7=Cloud low prob,
|
|
8=Cloud med prob, 9=Cloud high prob, 10=Thin cirrus
|
|
"""
|
|
# Mask: clouds, cloud shadows, saturated
|
|
mask = scl.isin([0, 1, 3, 8, 9, 10])
|
|
return mask
|
|
|
|
# Apply
|
|
scl = s2_image.select('SCL')
|
|
cloud_mask = improved_cloud_mask(scl)
|
|
image_clean = s2_image.updateMask(cloud_mask.Not())
|
|
```
|
|
|
|
## Error Messages Reference
|
|
|
|
| Error | Cause | Solution |
|
|
|-------|-------|----------|
|
|
| `CRS mismatch` | Different coordinate systems | `gdf2 = gdf2.to_crs(gdf1.crs)` |
|
|
| `TopologyException` | Invalid/self-intersecting geometry | `gdf.geometry = gdf.geometry.make_valid()` |
|
|
| `MemoryError` | Large dataset | Use Dask or chunked reading |
|
|
| `Invalid projection` | Unknown CRS code | Check EPSG code, use `CRS.from_user_input()` |
|
|
| `Empty geometry` | Null geometries | `gdf = gdf[~gdf.geometry.is_empty]` |
|
|
| `Bounds error` | Coordinates out of range | Filter invalid coordinates |
|
|
| `DLL load failed` | GDAL not installed | Use conda: `conda install gdal` |
|
|
| `Symbol not found` | Library linking issue | Reinstall with binary wheels or conda |
|
|
| `Self-intersection` | Invalid polygon | Buffer(0) or make_valid() |
|
|
|
|
## Debugging Strategies
|
|
|
|
### 1. Check Data Integrity
|
|
|
|
```python
|
|
def check_geodataframe(gdf):
|
|
"""Comprehensive GeoDataFrame health check."""
|
|
print(f"Shape: {gdf.shape}")
|
|
print(f"CRS: {gdf.crs}")
|
|
print(f"Bounds: {gdf.total_bounds}")
|
|
print(f"Invalid geometries: {(~gdf.is_valid).sum()}")
|
|
print(f"Empty geometries: {gdf.geometry.is_empty.sum()}")
|
|
print(f"None geometries: {gdf.geometry.isna().sum()}")
|
|
print(f"Duplicate geometries: {gdf.geometry.duplicated().sum()}")
|
|
print("\nGeometry types:")
|
|
print(gdf.geometry.type.value_counts())
|
|
print("\nCoordinate range:")
|
|
print(f" X: {gdf.geometry.x.min():.2f} to {gdf.geometry.x.max():.2f}")
|
|
print(f" Y: {gdf.geometry.y.min():.2f} to {gdf.geometry.y.max():.2f}")
|
|
|
|
check_geodataframe(gdf)
|
|
```
|
|
|
|
### 2. Test Transformations
|
|
|
|
```python
|
|
def test_reprojection(gdf, target_crs):
|
|
"""Test if reprojection is reversible."""
|
|
original = gdf.copy()
|
|
gdf_proj = gdf.to_crs(target_crs)
|
|
gdf_back = gdf_proj.to_crs(gdf.crs)
|
|
|
|
diff = original.geometry.distance(gdf_back.geometry).max()
|
|
if diff > 1: # More than 1 meter
|
|
print(f"Warning: Max error: {diff:.2f}m")
|
|
return False
|
|
return True
|
|
```
|
|
|
|
### 3. Profile Code
|
|
|
|
```python
|
|
import time
|
|
|
|
def time_operation(func, *args, **kwargs):
|
|
"""Time a geospatial operation."""
|
|
start = time.time()
|
|
result = func(*args, **kwargs)
|
|
elapsed = time.time() - start
|
|
print(f"{func.__name__}: {elapsed:.2f}s")
|
|
return result
|
|
|
|
# Usage
|
|
time_operation(gdf.to_crs, "EPSG:32610")
|
|
```
|
|
|
|
## Getting Help
|
|
|
|
### Check Versions
|
|
|
|
```python
|
|
import sys
|
|
import geopandas as gpd
|
|
import rasterio
|
|
from osgeo import gdal
|
|
|
|
print(f"Python: {sys.version}")
|
|
print(f"GeoPandas: {gpd.__version__}")
|
|
print(f"Rasterio: {rasterio.__version__}")
|
|
print(f"GDAL: {gdal.__version__}")
|
|
print(f"PROJ: {gdal.VersionInfo('PROJ')}")
|
|
```
|
|
|
|
### Useful Resources
|
|
|
|
- **GeoPandas docs**: https://geopandas.org/
|
|
- **Rasterio docs**: https://rasterio.readthedocs.io/
|
|
- **PROJ datab**: https://epsg.org/
|
|
- **Stack Overflow**: Tag with `gis` and `python`
|
|
- **GIS Stack Exchange**: https://gis.stackexchange.com/
|