mirror of
https://github.com/PenglongHuang/chinese-novelist-skill.git
synced 2026-03-26 14:29:27 +08:00
179 lines
5.6 KiB
Python
179 lines
5.6 KiB
Python
#!/usr/bin/env python3
|
||
# -*- coding: utf-8 -*-
|
||
"""
|
||
章节字数检查脚本
|
||
检查指定章节文件的字数,低于3000字时提示需要扩充
|
||
"""
|
||
|
||
import os
|
||
import re
|
||
import sys
|
||
from pathlib import Path
|
||
|
||
# 修复 Windows 控制台编码问题
|
||
if sys.platform == 'win32':
|
||
import io
|
||
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', errors='replace')
|
||
sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8', errors='replace')
|
||
|
||
|
||
def count_chinese_words(text: str) -> int:
|
||
"""统计中文字数(排除标点符号和Markdown标记)"""
|
||
# 移除Markdown标记
|
||
text = re.sub(r'#{1,6}\s*', '', text) # 标题
|
||
text = re.sub(r'\*\*(.*?)\*\*', r'\1', text) # 粗体
|
||
text = re.sub(r'\*(.*?)\*', r'\1', text) # 斜体
|
||
text = re.sub(r'~~(.*?)~~', r'\1', text) # 删除线
|
||
text = re.sub(r'`(.*?)`', r'\1', text) # 行内代码
|
||
text = re.sub(r'\[(.*?)\]\(.*?\)', r'\1', text) # 链接
|
||
|
||
# 统计中文字符(汉字)
|
||
chinese_chars = re.findall(r'[\u4e00-\u9fff]', text)
|
||
return len(chinese_chars)
|
||
|
||
|
||
def extract_content_from_chapter(file_path: Path) -> str:
|
||
"""从章节文件中提取正文内容(排除标题等元数据)"""
|
||
with open(file_path, 'r', encoding='utf-8') as f:
|
||
content = f.read()
|
||
|
||
# 查找正文开始位置(通常是第一个一级标题或二级标题之后)
|
||
lines = content.split('\n')
|
||
|
||
# 跳过开头的元数据(如 # 第XX章 标题)
|
||
content_start = 0
|
||
for i, line in enumerate(lines):
|
||
if line.startswith('#') and '章' in line:
|
||
content_start = i + 1
|
||
break
|
||
|
||
# 提取正文
|
||
main_content = '\n'.join(lines[content_start:])
|
||
return main_content
|
||
|
||
|
||
def check_chapter(file_path: str, min_words: int = 3000) -> dict:
|
||
"""检查单个章节的字数"""
|
||
path = Path(file_path)
|
||
|
||
if not path.exists():
|
||
return {
|
||
'file': str(path),
|
||
'exists': False,
|
||
'word_count': 0,
|
||
'status': 'error',
|
||
'message': f'文件不存在: {file_path}'
|
||
}
|
||
|
||
main_content = extract_content_from_chapter(path)
|
||
word_count = count_chinese_words(main_content)
|
||
|
||
status = 'pass' if word_count >= min_words else 'fail'
|
||
message = f'字数: {word_count}' + (
|
||
f' (✓ 达标)' if word_count >= min_words else f' (✗ 不足,需要至少 {min_words} 字)'
|
||
)
|
||
|
||
return {
|
||
'file': str(path),
|
||
'exists': True,
|
||
'word_count': word_count,
|
||
'status': status,
|
||
'message': message
|
||
}
|
||
|
||
|
||
def check_all_chapters(directory: str, pattern: str = '第*.md', min_words: int = 3000) -> list:
|
||
"""检查目录下所有符合模式的章节文件"""
|
||
dir_path = Path(directory)
|
||
if not dir_path.exists():
|
||
print(f'错误: 目录不存在 - {directory}')
|
||
return []
|
||
|
||
chapter_files = sorted(dir_path.glob(pattern))
|
||
results = []
|
||
|
||
for chapter_file in chapter_files:
|
||
result = check_chapter(str(chapter_file), min_words)
|
||
results.append(result)
|
||
|
||
return results
|
||
|
||
|
||
def print_results(results: list, min_words: int = 3000):
|
||
"""打印检查结果"""
|
||
if not results:
|
||
print('没有找到章节文件')
|
||
return
|
||
|
||
total_words = 0
|
||
passed = 0
|
||
failed = 0
|
||
|
||
print('\n' + '=' * 60)
|
||
print('章节字数检查报告')
|
||
print('=' * 60)
|
||
|
||
for result in results:
|
||
if not result['exists']:
|
||
print(f'\n❌ {result["file"]}')
|
||
print(f' {result["message"]}')
|
||
continue
|
||
|
||
total_words += result['word_count']
|
||
if result['status'] == 'pass':
|
||
passed += 1
|
||
icon = '✅'
|
||
else:
|
||
failed += 1
|
||
icon = '⚠️ '
|
||
|
||
print(f'\n{icon} {Path(result["file"]).name}')
|
||
print(f' {result["message"]}')
|
||
|
||
print('\n' + '-' * 60)
|
||
print(f'总计: {len(results)} 章 | {passed} 章达标 | {failed} 章不足 | 总字数: {total_words:,}')
|
||
print('-' * 60)
|
||
|
||
if failed > 0:
|
||
print(f'\n⚠️ 有 {failed} 章内容不足 {min_words} 字,建议使用扩充技巧:')
|
||
print(' - 添加细节描写(环境、心理、动作)')
|
||
print(' - 增加对话场景')
|
||
print(' - 扩展人物内心活动')
|
||
print(' - 补充背景故事')
|
||
print(f'\n 参考: references/content-expansion.md')
|
||
|
||
|
||
def main():
|
||
"""主函数"""
|
||
min_words = 3000
|
||
|
||
if len(sys.argv) < 2:
|
||
print('用法:')
|
||
print(' 检查单个章节: python check_chapter_wordcount.py <章节文件路径> [最小字数]')
|
||
print(' 检查所有章节: python check_chapter_wordcount.py --all <目录路径> [最小字数]')
|
||
print('')
|
||
print('示例:')
|
||
print(' python check_chapter_wordcount.py novels/故事/第01章.md')
|
||
print(' python check_chapter_wordcount.py novels/故事/第01章.md 3500')
|
||
print(' python check_chapter_wordcount.py --all novels/故事')
|
||
print(' python check_chapter_wordcount.py --all novels/故事 3500')
|
||
return
|
||
|
||
if sys.argv[1] == '--all':
|
||
if len(sys.argv) < 3:
|
||
print('错误: 使用 --all 时需要指定目录路径')
|
||
return
|
||
directory = sys.argv[2]
|
||
min_words = int(sys.argv[3]) if len(sys.argv) > 3 else 3000
|
||
results = check_all_chapters(directory, min_words=min_words)
|
||
print_results(results, min_words)
|
||
else:
|
||
file_path = sys.argv[1]
|
||
min_words = int(sys.argv[2]) if len(sys.argv) > 2 else 3000
|
||
result = check_chapter(file_path, min_words)
|
||
print_results([result], min_words)
|
||
|
||
|
||
if __name__ == '__main__':
|
||
main()
|