247 lines
10 KiB
Bash
Executable File
247 lines
10 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
# projdump.sh - дамп структуры и кода в один чистый .txt файл для ИИ
|
|
|
|
set -euo pipefail
|
|
|
|
# ── Настройки (можно менять) ─────────────────────────────────────────────────
|
|
MAX_FILE_SIZE_KB=100 # Пропускать файлы больше этого размера (КБ)
|
|
TRUNCATE_LINES=2000 # Обрезать файлы длиннее этого кол-ва строк
|
|
# ─────────────────────────────────────────────────────────────────────────────
|
|
|
|
usage() {
|
|
cat >&2 <<EOF
|
|
Использование: $0 [опции] <директория>
|
|
|
|
Создаёт один файл:
|
|
<basename>-dump.txt — дерево проекта + содержимое файлов
|
|
|
|
Опции:
|
|
-s, --max-size КБ Макс. размер файла в КБ (по умолчанию: $MAX_FILE_SIZE_KB)
|
|
-l, --max-lines N Обрезать после N строк (по умолчанию: $TRUNCATE_LINES)
|
|
-h, --help Показать эту справку
|
|
EOF
|
|
}
|
|
|
|
# ── Парсинг аргументов ───────────────────────────────────────────────────────
|
|
while [[ $# -gt 0 ]]; do
|
|
case "$1" in
|
|
-s|--max-size) MAX_FILE_SIZE_KB="$2"; shift 2 ;;
|
|
-l|--max-lines) TRUNCATE_LINES="$2"; shift 2 ;;
|
|
-h|--help) usage; exit 0 ;;
|
|
-*) echo "Неизвестная опция: $1" >&2; usage; exit 1 ;;
|
|
*) TARGET_DIR="$1"; shift ;;
|
|
esac
|
|
done
|
|
|
|
if [[ -z "${TARGET_DIR:-}" ]]; then
|
|
usage
|
|
exit 1
|
|
fi
|
|
|
|
if [[ ! -d "$TARGET_DIR" ]]; then
|
|
echo "Ошибка: директория '$TARGET_DIR' не существует." >&2
|
|
exit 1
|
|
fi
|
|
|
|
TARGET_DIR="$(cd "$TARGET_DIR" && pwd)"
|
|
BASENAME=$(basename "$TARGET_DIR")
|
|
|
|
if ! command -v tree >/dev/null 2>&1; then
|
|
echo "Ошибка: требуется утилита 'tree'." >&2
|
|
exit 1
|
|
fi
|
|
|
|
# ── Игнорируемые директории и файлы ──────────────────────────────────────────
|
|
IGNORE_LIST=(
|
|
# VCS
|
|
".git" ".svn" ".hg"
|
|
# Зависимости
|
|
"node_modules" "vendor" "third_party" "3rd_party" "3rdparty"
|
|
"bower_components" ".pnp" ".pnp.js"
|
|
# Python
|
|
"__pycache__" "*.pyc" "*.pyo" ".venv" "venv" "env" ".env"
|
|
".mypy_cache" ".pytest_cache" ".ruff_cache" ".tox" "*.egg-info"
|
|
# IDE / редакторы
|
|
".idea" ".vscode" ".vs" "*.swp" "*.swo" "*~"
|
|
# OS
|
|
".DS_Store" "Thumbs.db" "desktop.ini"
|
|
# Сборка / выходные директории
|
|
"dist" "build" "out" "target" "_build" "release"
|
|
# JS / фронтенд
|
|
".next" ".nuxt" ".output" ".svelte-kit" ".parcel-cache"
|
|
".eslintcache" "coverage" ".cache" ".turbo"
|
|
# Lock-файлы
|
|
"package-lock.json" "yarn.lock" "pnpm-lock.yaml" "bun.lockb"
|
|
"Cargo.lock" "go.sum" "Gemfile.lock" "poetry.lock"
|
|
"composer.lock" "Pipfile.lock" "flake.lock"
|
|
# Yarn
|
|
".yarn" "yarn-error.log"
|
|
# Java / Kotlin / Gradle / Maven
|
|
".gradle" "gradle" ".mvn" "mvnw" "mvnw.cmd"
|
|
# Rust / Go / Haskell
|
|
".cargo" ".rustup" ".stack-work" "deps"
|
|
# C / C++
|
|
".clangd" ".ccls-cache" "CMakeFiles" "CMakeCache.txt"
|
|
"CTestTestfile.cmake" "cmake_install.cmake"
|
|
# Скомпилированные бинарники
|
|
"*.dSYM" "*.ilk" "*.pdb" "*.obj" "*.o" "*.so" "*.dylib"
|
|
"*.dll" "*.exe" "*.bin" "*.hex" "*.elf" "*.a" "*.lib" "*.class"
|
|
# Теги
|
|
"tags" "TAGS" "GPATH" "GTAGS" "GRTAGS" "GSYMS"
|
|
# CI / CD / Docker
|
|
".github" ".gitlab" ".circleci" ".travis.yml"
|
|
"Jenkinsfile" "Dockerfile*" "docker-compose*.yml"
|
|
# Логи / временные
|
|
"*.log" "logs" "tmp" "temp"
|
|
# Terraform
|
|
".terraform" ".tfstate" ".tfstate.backup" "*.tfvars"
|
|
# Минифицированные файлы
|
|
"*.min.js" "*.min.css" "*.min.map"
|
|
# Source maps
|
|
"*.map"
|
|
# Медиа / шрифты / бинарные ассеты
|
|
"*.png" "*.jpg" "*.jpeg" "*.gif" "*.bmp" "*.ico" "*.icns"
|
|
"*.webp" "*.avif" "*.svg" "*.mp3" "*.mp4" "*.avi" "*.mov"
|
|
"*.mkv" "*.wav" "*.flac" "*.ogg" "*.webm"
|
|
"*.woff" "*.woff2" "*.ttf" "*.otf" "*.eot"
|
|
"*.pdf" "*.doc" "*.docx" "*.xls" "*.xlsx" "*.ppt" "*.pptx"
|
|
"*.zip" "*.tar" "*.gz" "*.bz2" "*.xz" "*.7z" "*.rar"
|
|
"*.db" "*.sqlite" "*.sqlite3"
|
|
"*.jar" "*.war" "*.ear"
|
|
"*.iso" "*.dmg" "*.img"
|
|
)
|
|
|
|
# ── Подготовка аргументов для find ───────────────────────────────────────────
|
|
FIND_IGNORE_ARGS=()
|
|
for pat in "${IGNORE_LIST[@]}"; do
|
|
if [[ "$pat" == */* ]]; then
|
|
FIND_IGNORE_ARGS+=(-o -path "*/$pat")
|
|
else
|
|
FIND_IGNORE_ARGS+=(-o -name "$pat")
|
|
fi
|
|
done
|
|
|
|
FIND_CMD=(find "$TARGET_DIR" \( -false "${FIND_IGNORE_ARGS[@]}" \) -prune -o -type f -print0)
|
|
|
|
# ── Подготовка шаблонов для tree ─────────────────────────────────────────────
|
|
TREE_IGNORE_PATTERNS=""
|
|
for pat in "${IGNORE_LIST[@]}"; do
|
|
base_pat="${pat%%/*}"
|
|
if [[ -z "$TREE_IGNORE_PATTERNS" ]]; then
|
|
TREE_IGNORE_PATTERNS="$base_pat"
|
|
else
|
|
TREE_IGNORE_PATTERNS="$TREE_IGNORE_PATTERNS|$base_pat"
|
|
fi
|
|
done
|
|
|
|
# ── Проверка: текстовый ли файл ─────────────────────────────────────────────
|
|
is_text_file() {
|
|
local f="$1"
|
|
local mime
|
|
mime=$(file --brief --mime-type "$f" 2>/dev/null || echo "unknown")
|
|
case "$mime" in
|
|
text/*|application/json|application/xml|application/javascript|\
|
|
application/x-shellscript|application/x-httpd-php|application/toml|\
|
|
application/x-yaml|application/x-ruby|application/x-perl|\
|
|
application/x-python|inode/x-empty)
|
|
return 0 ;;
|
|
*)
|
|
return 1 ;;
|
|
esac
|
|
}
|
|
|
|
# ── Форматирование размера ───────────────────────────────────────────────────
|
|
human_size() {
|
|
local bytes=$1
|
|
if (( bytes < 1024 )); then
|
|
echo "${bytes} B"
|
|
elif (( bytes < 1048576 )); then
|
|
echo "$(( bytes / 1024 )) KB"
|
|
else
|
|
echo "$(awk "BEGIN {printf \"%.1f MB\", $bytes/1048576}")"
|
|
fi
|
|
}
|
|
|
|
# ── Генерация дампа ──────────────────────────────────────────────────────────
|
|
OUTPUT_FILE="${BASENAME}-dump.txt"
|
|
MAX_FILE_SIZE_BYTES=$(( MAX_FILE_SIZE_KB * 1024 ))
|
|
|
|
FILE_COUNT=0
|
|
SKIPPED_BINARY=0
|
|
SKIPPED_SIZE=0
|
|
TRUNCATED=0
|
|
|
|
{
|
|
echo "==============================================================================="
|
|
echo "PROJECT NAME: $BASENAME"
|
|
echo "GENERATED ON: $(date)"
|
|
echo "MAX FILE SIZE: ${MAX_FILE_SIZE_KB} KB"
|
|
echo "TRUNCATE AFTER: ${TRUNCATE_LINES} lines"
|
|
echo "==============================================================================="
|
|
echo ""
|
|
echo "--- PROJECT STRUCTURE ---"
|
|
tree -n -I "$TREE_IGNORE_PATTERNS" "$TARGET_DIR"
|
|
echo ""
|
|
echo "==============================================================================="
|
|
echo "SOURCE CODE CONTENTS"
|
|
echo "==============================================================================="
|
|
|
|
while IFS= read -r -d '' file; do
|
|
rel_path="${file#$TARGET_DIR/}"
|
|
file_size=$(stat -c%s "$file" 2>/dev/null || stat -f%z "$file" 2>/dev/null || echo 0)
|
|
|
|
# Проверка размера
|
|
if (( file_size > MAX_FILE_SIZE_BYTES )); then
|
|
echo ""
|
|
echo "FILE: $rel_path ($(human_size "$file_size"))"
|
|
echo "-------------------------------------------------------------------------------"
|
|
echo "[SKIPPED — file exceeds ${MAX_FILE_SIZE_KB} KB limit]"
|
|
echo "-------------------------------------------------------------------------------"
|
|
(( SKIPPED_SIZE++ ))
|
|
continue
|
|
fi
|
|
|
|
# Проверка: текстовый ли
|
|
if ! is_text_file "$file"; then
|
|
(( SKIPPED_BINARY++ ))
|
|
continue
|
|
fi
|
|
|
|
line_count=$(wc -l < "$file" || echo 0)
|
|
FILE_COUNT=$(( FILE_COUNT + 1 ))
|
|
|
|
echo ""
|
|
echo "FILE: $rel_path (${line_count} lines, $(human_size "$file_size"))"
|
|
echo "-------------------------------------------------------------------------------"
|
|
|
|
if (( line_count > TRUNCATE_LINES )); then
|
|
head -n "$TRUNCATE_LINES" "$file"
|
|
echo ""
|
|
echo "[TRUNCATED — showing first ${TRUNCATE_LINES} of ${line_count} lines]"
|
|
(( TRUNCATED++ ))
|
|
else
|
|
cat "$file"
|
|
fi
|
|
|
|
echo ""
|
|
echo "-------------------------------------------------------------------------------"
|
|
done < <("${FIND_CMD[@]}" | sort -z)
|
|
|
|
echo ""
|
|
echo "==============================================================================="
|
|
echo "SUMMARY"
|
|
echo "==============================================================================="
|
|
echo "Files included: $FILE_COUNT"
|
|
echo "Skipped (binary): $SKIPPED_BINARY"
|
|
echo "Skipped (too large): $SKIPPED_SIZE"
|
|
echo "Truncated: $TRUNCATED"
|
|
echo "==============================================================================="
|
|
} > "$OUTPUT_FILE"
|
|
|
|
echo "Готово: $OUTPUT_FILE"
|
|
echo " Файлов включено: $FILE_COUNT"
|
|
echo " Пропущено (бинарные): $SKIPPED_BINARY"
|
|
echo " Пропущено (большие): $SKIPPED_SIZE"
|
|
echo " Обрезано: $TRUNCATED"
|
|
echo " Размер дампа: $(human_size "$(stat -c%s "$OUTPUT_FILE" 2>/dev/null || stat -f%z "$OUTPUT_FILE")")"
|