diff --git a/projdump.sh b/projdump.sh index ca67345..e78ba94 100755 --- a/projdump.sh +++ b/projdump.sh @@ -3,24 +3,40 @@ set -euo pipefail +# ── Настройки (можно менять) ───────────────────────────────────────────────── +MAX_FILE_SIZE_KB=100 # Пропускать файлы больше этого размера (КБ) +TRUNCATE_LINES=2000 # Обрезать файлы длиннее этого кол-ва строк +# ───────────────────────────────────────────────────────────────────────────── + usage() { cat >&2 < +Использование: $0 [опции] <директория> Создаёт один файл: - -dump.txt — дерево проекта + содержимое файлов (без MD-разметки) + -dump.txt — дерево проекта + содержимое файлов -Параметры: - -h, --help Показать эту справку +Опции: + -s, --max-size КБ Макс. размер файла в КБ (по умолчанию: $MAX_FILE_SIZE_KB) + -l, --max-lines N Обрезать после N строк (по умолчанию: $TRUNCATE_LINES) + -h, --help Показать эту справку EOF } -if [[ $# -eq 0 ]] || [[ "$1" == "-h" ]] || [[ "$1" == "--help" ]]; then - usage - exit 0 -fi +# ── Парсинг аргументов ─────────────────────────────────────────────────────── +while [[ $# -gt 0 ]]; do + case "$1" in + -s|--max-size) MAX_FILE_SIZE_KB="$2"; shift 2 ;; + -l|--max-lines) TRUNCATE_LINES="$2"; shift 2 ;; + -h|--help) usage; exit 0 ;; + -*) echo "Неизвестная опция: $1" >&2; usage; exit 1 ;; + *) TARGET_DIR="$1"; shift ;; + esac +done -TARGET_DIR="$1" +if [[ -z "${TARGET_DIR:-}" ]]; then + usage + exit 1 +fi if [[ ! -d "$TARGET_DIR" ]]; then echo "Ошибка: директория '$TARGET_DIR' не существует." >&2 @@ -35,24 +51,67 @@ if ! command -v tree >/dev/null 2>&1; then exit 1 fi -# Список игнорирования (оставлен без изменений) +# ── Игнорируемые директории и файлы ────────────────────────────────────────── IGNORE_LIST=( - "third_party" "3rd_party" "3rdparty" ".git" ".svn" ".hg" "node_modules" - "__pycache__" "*.pyc" ".venv" "venv" "env" ".env" ".idea" ".vscode" ".vs" - ".DS_Store" "Thumbs.db" "dist" "build" "out" "target" ".pytest_cache" - ".mypy_cache" ".next" ".nuxt" ".output" ".svelte-kit" "coverage" ".cache" - ".parcel-cache" ".eslintcache" ".yarn" "yarn-error.log" "package-lock.json" - "yarn.lock" "pnpm-lock.yaml" "Cargo.lock" "go.sum" ".gradle" "gradle" - ".mvn" "mvnw" "mvnw.cmd" "*.swp" "*.swo" ".stack-work" "_build" "deps" - ".cargo" ".rustup" ".clangd" ".ccls-cache" "CMakeFiles" "CMakeCache.txt" - "CTestTestfile.cmake" "cmake_install.cmake" "*.dSYM" "*.ilk" "*.pdb" - "*.obj" "*.o" "*.so" "*.dylib" "*.dll" "*.exe" "*.bin" "*.hex" "*.elf" - "tags" "TAGS" "GPATH" "GTAGS" "GRTAGS" "GSYMS" ".github" ".gitlab" - ".circleci" ".travis.yml" "Jenkinsfile" "Dockerfile*" "docker-compose*.yml" - "*.log" "logs" "tmp" "temp" ".terraform" ".tfstate" ".tfstate.backup" "*.tfvars" + # VCS + ".git" ".svn" ".hg" + # Зависимости + "node_modules" "vendor" "third_party" "3rd_party" "3rdparty" + "bower_components" ".pnp" ".pnp.js" + # Python + "__pycache__" "*.pyc" "*.pyo" ".venv" "venv" "env" ".env" + ".mypy_cache" ".pytest_cache" ".ruff_cache" ".tox" "*.egg-info" + # IDE / редакторы + ".idea" ".vscode" ".vs" "*.swp" "*.swo" "*~" + # OS + ".DS_Store" "Thumbs.db" "desktop.ini" + # Сборка / выходные директории + "dist" "build" "out" "target" "_build" "release" + # JS / фронтенд + ".next" ".nuxt" ".output" ".svelte-kit" ".parcel-cache" + ".eslintcache" "coverage" ".cache" ".turbo" + # Lock-файлы + "package-lock.json" "yarn.lock" "pnpm-lock.yaml" "bun.lockb" + "Cargo.lock" "go.sum" "Gemfile.lock" "poetry.lock" + "composer.lock" "Pipfile.lock" "flake.lock" + # Yarn + ".yarn" "yarn-error.log" + # Java / Kotlin / Gradle / Maven + ".gradle" "gradle" ".mvn" "mvnw" "mvnw.cmd" + # Rust / Go / Haskell + ".cargo" ".rustup" ".stack-work" "deps" + # C / C++ + ".clangd" ".ccls-cache" "CMakeFiles" "CMakeCache.txt" + "CTestTestfile.cmake" "cmake_install.cmake" + # Скомпилированные бинарники + "*.dSYM" "*.ilk" "*.pdb" "*.obj" "*.o" "*.so" "*.dylib" + "*.dll" "*.exe" "*.bin" "*.hex" "*.elf" "*.a" "*.lib" "*.class" + # Теги + "tags" "TAGS" "GPATH" "GTAGS" "GRTAGS" "GSYMS" + # CI / CD / Docker + ".github" ".gitlab" ".circleci" ".travis.yml" + "Jenkinsfile" "Dockerfile*" "docker-compose*.yml" + # Логи / временные + "*.log" "logs" "tmp" "temp" + # Terraform + ".terraform" ".tfstate" ".tfstate.backup" "*.tfvars" + # Минифицированные файлы + "*.min.js" "*.min.css" "*.min.map" + # Source maps + "*.map" + # Медиа / шрифты / бинарные ассеты + "*.png" "*.jpg" "*.jpeg" "*.gif" "*.bmp" "*.ico" "*.icns" + "*.webp" "*.avif" "*.svg" "*.mp3" "*.mp4" "*.avi" "*.mov" + "*.mkv" "*.wav" "*.flac" "*.ogg" "*.webm" + "*.woff" "*.woff2" "*.ttf" "*.otf" "*.eot" + "*.pdf" "*.doc" "*.docx" "*.xls" "*.xlsx" "*.ppt" "*.pptx" + "*.zip" "*.tar" "*.gz" "*.bz2" "*.xz" "*.7z" "*.rar" + "*.db" "*.sqlite" "*.sqlite3" + "*.jar" "*.war" "*.ear" + "*.iso" "*.dmg" "*.img" ) -# Подготовка аргументов для find +# ── Подготовка аргументов для find ─────────────────────────────────────────── FIND_IGNORE_ARGS=() for pat in "${IGNORE_LIST[@]}"; do if [[ "$pat" == */* ]]; then @@ -64,7 +123,7 @@ done FIND_CMD=(find "$TARGET_DIR" \( -false "${FIND_IGNORE_ARGS[@]}" \) -prune -o -type f -print0) -# Подготовка шаблонов для tree +# ── Подготовка шаблонов для tree ───────────────────────────────────────────── TREE_IGNORE_PATTERNS="" for pat in "${IGNORE_LIST[@]}"; do base_pat="${pat%%/*}" @@ -75,29 +134,113 @@ for pat in "${IGNORE_LIST[@]}"; do fi done +# ── Проверка: текстовый ли файл ───────────────────────────────────────────── +is_text_file() { + local f="$1" + local mime + mime=$(file --brief --mime-type "$f" 2>/dev/null || echo "unknown") + case "$mime" in + text/*|application/json|application/xml|application/javascript|\ + application/x-shellscript|application/x-httpd-php|application/toml|\ + application/x-yaml|application/x-ruby|application/x-perl|\ + application/x-python|inode/x-empty) + return 0 ;; + *) + return 1 ;; + esac +} + +# ── Форматирование размера ─────────────────────────────────────────────────── +human_size() { + local bytes=$1 + if (( bytes < 1024 )); then + echo "${bytes} B" + elif (( bytes < 1048576 )); then + echo "$(( bytes / 1024 )) KB" + else + echo "$(awk "BEGIN {printf \"%.1f MB\", $bytes/1048576}")" + fi +} + +# ── Генерация дампа ────────────────────────────────────────────────────────── OUTPUT_FILE="${BASENAME}-dump.txt" +MAX_FILE_SIZE_BYTES=$(( MAX_FILE_SIZE_KB * 1024 )) + +FILE_COUNT=0 +SKIPPED_BINARY=0 +SKIPPED_SIZE=0 +TRUNCATED=0 { echo "===============================================================================" echo "PROJECT NAME: $BASENAME" echo "GENERATED ON: $(date)" + echo "MAX FILE SIZE: ${MAX_FILE_SIZE_KB} KB" + echo "TRUNCATE AFTER: ${TRUNCATE_LINES} lines" echo "===============================================================================" - echo -e "\n--- PROJECT STRUCTURE ---" + echo "" + echo "--- PROJECT STRUCTURE ---" tree -n -I "$TREE_IGNORE_PATTERNS" "$TARGET_DIR" - echo -e "\n===============================================================================" + echo "" + echo "===============================================================================" echo "SOURCE CODE CONTENTS" echo "===============================================================================" while IFS= read -r -d '' file; do - # Относительный путь для красоты rel_path="${file#$TARGET_DIR/}" - - echo -e "\nFILE: $rel_path" + file_size=$(stat -c%s "$file" 2>/dev/null || stat -f%z "$file" 2>/dev/null || echo 0) + + # Проверка размера + if (( file_size > MAX_FILE_SIZE_BYTES )); then + echo "" + echo "FILE: $rel_path ($(human_size "$file_size"))" + echo "-------------------------------------------------------------------------------" + echo "[SKIPPED — file exceeds ${MAX_FILE_SIZE_KB} KB limit]" + echo "-------------------------------------------------------------------------------" + (( SKIPPED_SIZE++ )) + continue + fi + + # Проверка: текстовый ли + if ! is_text_file "$file"; then + (( SKIPPED_BINARY++ )) + continue + fi + + line_count=$(wc -l < "$file" || echo 0) + FILE_COUNT=$(( FILE_COUNT + 1 )) + + echo "" + echo "FILE: $rel_path (${line_count} lines, $(human_size "$file_size"))" echo "-------------------------------------------------------------------------------" - cat "$file" - echo -e "\n-------------------------------------------------------------------------------" - echo "END OF FILE: $rel_path" - done < <("${FIND_CMD[@]}") + + if (( line_count > TRUNCATE_LINES )); then + head -n "$TRUNCATE_LINES" "$file" + echo "" + echo "[TRUNCATED — showing first ${TRUNCATE_LINES} of ${line_count} lines]" + (( TRUNCATED++ )) + else + cat "$file" + fi + + echo "" + echo "-------------------------------------------------------------------------------" + done < <("${FIND_CMD[@]}" | sort -z) + + echo "" + echo "===============================================================================" + echo "SUMMARY" + echo "===============================================================================" + echo "Files included: $FILE_COUNT" + echo "Skipped (binary): $SKIPPED_BINARY" + echo "Skipped (too large): $SKIPPED_SIZE" + echo "Truncated: $TRUNCATED" + echo "===============================================================================" } > "$OUTPUT_FILE" -echo "Готово: $OUTPUT_FILE" \ No newline at end of file +echo "Готово: $OUTPUT_FILE" +echo " Файлов включено: $FILE_COUNT" +echo " Пропущено (бинарные): $SKIPPED_BINARY" +echo " Пропущено (большие): $SKIPPED_SIZE" +echo " Обрезано: $TRUNCATED" +echo " Размер дампа: $(human_size "$(stat -c%s "$OUTPUT_FILE" 2>/dev/null || stat -f%z "$OUTPUT_FILE")")"