refactor: 结构化skill

This commit is contained in:
Penglong
2026-01-28 23:58:37 +08:00
parent 6a9c725bed
commit 8ab30cdf00
8 changed files with 638 additions and 390 deletions

View File

@@ -0,0 +1,178 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
章节字数检查脚本
检查指定章节文件的字数低于3000字时提示需要扩充
"""
import os
import re
import sys
from pathlib import Path
# 修复 Windows 控制台编码问题
if sys.platform == 'win32':
import io
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', errors='replace')
sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8', errors='replace')
def count_chinese_words(text: str) -> int:
"""统计中文字数排除标点符号和Markdown标记"""
# 移除Markdown标记
text = re.sub(r'#{1,6}\s*', '', text) # 标题
text = re.sub(r'\*\*(.*?)\*\*', r'\1', text) # 粗体
text = re.sub(r'\*(.*?)\*', r'\1', text) # 斜体
text = re.sub(r'~~(.*?)~~', r'\1', text) # 删除线
text = re.sub(r'`(.*?)`', r'\1', text) # 行内代码
text = re.sub(r'\[(.*?)\]\(.*?\)', r'\1', text) # 链接
# 统计中文字符(汉字)
chinese_chars = re.findall(r'[\u4e00-\u9fff]', text)
return len(chinese_chars)
def extract_content_from_chapter(file_path: Path) -> str:
"""从章节文件中提取正文内容(排除标题等元数据)"""
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
# 查找正文开始位置(通常是第一个一级标题或二级标题之后)
lines = content.split('\n')
# 跳过开头的元数据(如 # 第XX章 标题)
content_start = 0
for i, line in enumerate(lines):
if line.startswith('#') and '' in line:
content_start = i + 1
break
# 提取正文
main_content = '\n'.join(lines[content_start:])
return main_content
def check_chapter(file_path: str, min_words: int = 3000) -> dict:
"""检查单个章节的字数"""
path = Path(file_path)
if not path.exists():
return {
'file': str(path),
'exists': False,
'word_count': 0,
'status': 'error',
'message': f'文件不存在: {file_path}'
}
main_content = extract_content_from_chapter(path)
word_count = count_chinese_words(main_content)
status = 'pass' if word_count >= min_words else 'fail'
message = f'字数: {word_count}' + (
f' (✓ 达标)' if word_count >= min_words else f' (✗ 不足,需要至少 {min_words} 字)'
)
return {
'file': str(path),
'exists': True,
'word_count': word_count,
'status': status,
'message': message
}
def check_all_chapters(directory: str, pattern: str = '第*.md', min_words: int = 3000) -> list:
"""检查目录下所有符合模式的章节文件"""
dir_path = Path(directory)
if not dir_path.exists():
print(f'错误: 目录不存在 - {directory}')
return []
chapter_files = sorted(dir_path.glob(pattern))
results = []
for chapter_file in chapter_files:
result = check_chapter(str(chapter_file), min_words)
results.append(result)
return results
def print_results(results: list, min_words: int = 3000):
"""打印检查结果"""
if not results:
print('没有找到章节文件')
return
total_words = 0
passed = 0
failed = 0
print('\n' + '=' * 60)
print('章节字数检查报告')
print('=' * 60)
for result in results:
if not result['exists']:
print(f'\n{result["file"]}')
print(f' {result["message"]}')
continue
total_words += result['word_count']
if result['status'] == 'pass':
passed += 1
icon = ''
else:
failed += 1
icon = '⚠️ '
print(f'\n{icon} {Path(result["file"]).name}')
print(f' {result["message"]}')
print('\n' + '-' * 60)
print(f'总计: {len(results)} 章 | {passed} 章达标 | {failed} 章不足 | 总字数: {total_words:,}')
print('-' * 60)
if failed > 0:
print(f'\n⚠️ 有 {failed} 章内容不足 {min_words} 字,建议使用扩充技巧:')
print(' - 添加细节描写(环境、心理、动作)')
print(' - 增加对话场景')
print(' - 扩展人物内心活动')
print(' - 补充背景故事')
print(f'\n 参考: references/content-expansion.md')
def main():
"""主函数"""
min_words = 3000
if len(sys.argv) < 2:
print('用法:')
print(' 检查单个章节: python check_chapter_wordcount.py <章节文件路径> [最小字数]')
print(' 检查所有章节: python check_chapter_wordcount.py --all <目录路径> [最小字数]')
print('')
print('示例:')
print(' python check_chapter_wordcount.py novels/故事/第01章.md')
print(' python check_chapter_wordcount.py novels/故事/第01章.md 3500')
print(' python check_chapter_wordcount.py --all novels/故事')
print(' python check_chapter_wordcount.py --all novels/故事 3500')
return
if sys.argv[1] == '--all':
if len(sys.argv) < 3:
print('错误: 使用 --all 时需要指定目录路径')
return
directory = sys.argv[2]
min_words = int(sys.argv[3]) if len(sys.argv) > 3 else 3000
results = check_all_chapters(directory, min_words=min_words)
print_results(results, min_words)
else:
file_path = sys.argv[1]
min_words = int(sys.argv[2]) if len(sys.argv) > 2 else 3000
result = check_chapter(file_path, min_words)
print_results([result], min_words)
if __name__ == '__main__':
main()