Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 18 additions & 6 deletions install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,18 @@
set -euo pipefail

repo_url="${TOCODE_REPO_URL:-https://github.com/buzzer-re/ToCode.git}"
install_dir="${TOCODE_INSTALL_DIR:-$HOME/ToCode}"
branch="${TOCODE_BRANCH:-main}"
with_dev=false

script_dir="$(cd "$(dirname "$0")" && pwd)"
if [ -n "${TOCODE_INSTALL_DIR:-}" ]; then
install_dir="$TOCODE_INSTALL_DIR"
elif [ -d "$script_dir/.git" ] && [ -f "$script_dir/pyproject.toml" ]; then
install_dir="$script_dir"
else
install_dir="$HOME/ToCode"
fi

usage() {
cat <<'EOF'
Install ToCode on Linux or macOS.
Expand All @@ -14,7 +22,7 @@ Usage:
./install.sh [options]

Options:
--dir PATH Clone or update ToCode at PATH. Default: $HOME/ToCode
--dir PATH Clone or update ToCode at PATH. Default: this checkout when run from one, otherwise $HOME/ToCode
--repo URL Git repository URL. Default: https://github.com/buzzer-re/ToCode.git
--branch NAME Branch to install. Default: main
--dev Also install development extras in the local checkout
Expand Down Expand Up @@ -124,10 +132,14 @@ done
command -v git >/dev/null 2>&1 || die "git is required but was not found on PATH"

if [ -d "$install_dir/.git" ]; then
info "Updating ToCode at $install_dir"
git -C "$install_dir" fetch origin "$branch"
git -C "$install_dir" checkout "$branch"
git -C "$install_dir" pull --ff-only origin "$branch"
if [ "$install_dir" = "$script_dir" ]; then
info "Installing ToCode from this checkout at $install_dir"
else
info "Updating ToCode at $install_dir"
git -C "$install_dir" fetch origin "$branch"
git -C "$install_dir" checkout "$branch"
git -C "$install_dir" pull --ff-only origin "$branch"
fi
elif [ -e "$install_dir" ]; then
die "$install_dir already exists and is not a Git checkout"
else
Expand Down
20 changes: 19 additions & 1 deletion src/tocode/analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@ def __init__(
self.progress = progress or Progress()
self.analysis: ProgramAnalysis | None = None
self.analysis_seconds: float | None = None
self.progress.log(f"Loading {self.binary}")

@property
def backend_name(self) -> str:
Expand Down Expand Up @@ -159,6 +158,21 @@ def prepare_parallel_workers(self) -> None:
if callable(prepare):
prepare()

def release_parallel_resources(self) -> None:
release = getattr(self.session, "release_parallel_resources", None)
if callable(release):
release()

def restore_parallel_resources(self) -> None:
restore = getattr(self.session, "restore_parallel_resources", None)
if callable(restore):
restore()

def release_render_memory(self) -> None:
release = getattr(self.session, "release_render_memory", None)
if callable(release):
release()

def _binary_facts(
self, info: dict[str, Any], entries: list[dict[str, Any]]
) -> BinaryFacts:
Expand Down Expand Up @@ -398,6 +412,10 @@ def create_analyzer(
)
if progress is not None:
progress.log(f"Using {choice.selected.upper()} as backend.")
# Log before constructing the session: opening (and, for a fresh binary,
# loading) the IDA database happens inside the session constructor and can
# take a while, so the user should see activity instead of a silent gap.
progress.log(f"Loading {Path(binary).resolve()}")
if choice.selected == "ida":
return BinaryAnalyzer(
binary,
Expand Down
97 changes: 68 additions & 29 deletions src/tocode/backends/ida.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,13 +74,17 @@ def __init__(
self._ida_fixup = self._optional_import("ida_fixup")
self._ida_auto = self._optional_import("ida_auto")
self._ida_nalt = self._optional_import("ida_nalt")
self._db: Any = None

if db_path is None:
resolved_db, first_open = _database_path(self.binary)
needs_analysis = first_open
else:
resolved_db = db_path
needs_analysis = bool(needs_analysis)
self.analysis_command = (
"IDA Domain auto-analysis" if needs_analysis else "IDA database inventory"
)

self._cache_db = None if is_ida_database(self.binary) else resolved_db
if needs_analysis:
Expand Down Expand Up @@ -159,12 +163,15 @@ def analyze(self) -> None:
self._strings_ready = True

def close(self) -> None:
if self._db is None:
return
try:
self._db.close(save=self._opened_for_analysis)
except Exception: # noqa: BLE001
pass
finally:
self._opened_for_analysis = False
self._db = None

def database_path(self) -> Path | None:
if self._cache_db is None:
Expand All @@ -180,32 +187,80 @@ def prepare_parallel_workers(self) -> None:
return
self._save_and_reopen_database()

def release_parallel_resources(self) -> None:
if self._cache_db is not None:
self.prepare_parallel_workers()
self._clear_caches()
if self._db is None:
return
try:
self._db.close(save=False)
except Exception as exc: # noqa: BLE001
raise BackendError("failed to close parent IDA database") from exc
self._db = None
self._opened_for_analysis = False
self._decompiler_ready = False

def restore_parallel_resources(self) -> None:
if self._db is not None:
return
resolved_db = self._cache_db
if resolved_db is None and is_ida_database(self.binary):
resolved_db = self.binary
if resolved_db is None:
return
self._open_existing_database(resolved_db)

def release_render_memory(self) -> None:
self._disasm_cache.clear()
self._decompile_cache.clear()
self._summary_cache.clear()
self._locals_cache.clear()
if self._ida_hexrays is None:
return
clear_cached = getattr(self._ida_hexrays, "clear_cached_cfuncs", None)
if callable(clear_cached):
try:
clear_cached()
except Exception: # noqa: BLE001
pass

def _save_and_reopen_database(self) -> None:
if self._cache_db is None:
return
if self._db is None:
self._open_existing_database(self._cache_db)
return
try:
self._db.close(save=True)
except Exception as exc: # noqa: BLE001
raise BackendError(
f"failed to save IDA database at {self._cache_db}"
) from exc
self._opened_for_analysis = False
self._open_existing_database(self._cache_db)

def _open_existing_database(self, resolved_db: Path) -> None:
options = self._Options(auto_analysis=False, new_database=False)
try:
self._db = self._Database.open(
str(self._cache_db), args=options, save_on_close=False
str(resolved_db), args=options, save_on_close=False
)
except Exception as exc: # noqa: BLE001
raise BackendError(
f"failed to reopen IDA database at {self._cache_db}"
f"failed to reopen IDA database at {resolved_db}"
) from exc
self._opened_for_analysis = False
self._clear_caches()
self.ensure_decompiler()

def _clear_caches(self) -> None:
self._decompiler_ready = False
self._disasm_cache.clear()
self._decompile_cache.clear()
self._summary_cache.clear()
self._locals_cache.clear()
self._primed.clear()
self.ensure_decompiler()

def worker(self) -> "IdaSession":
if self._cache_db is not None and self._cache_db.exists():
Expand Down Expand Up @@ -413,19 +468,10 @@ def functions(self) -> list[dict[str, Any]]:
for func in self._db.functions:
name = self._db.functions.get_name(func) or f"sub_{func.start_ea:x}"
segment_name = self._segment_name(func.start_ea)
self._prime(func.start_ea)

flags = self._db.functions.get_flags(func)
is_library = bool(flags & FunctionFlags.LIB)
is_thunk = bool(flags & FunctionFlags.THUNK)
lvars = self._locals(func.start_ea)
args = sum(1 for item in lvars if bool(getattr(item, "is_argument", False)))
locals_count = sum(
1
for item in lvars
if not bool(getattr(item, "is_argument", False))
and not bool(getattr(item, "is_result", False))
)
rows.append(
{
"offset": int(func.start_ea),
Expand All @@ -435,8 +481,8 @@ def functions(self) -> list[dict[str, Any]]:
"calltype": None,
"noreturn": not bool(self._db.functions.does_return(func)),
"stackframe": int(getattr(func, "frsize", 0) or 0),
"nlocals": locals_count,
"nargs": args,
"nlocals": 0,
"nargs": 0,
"outdegree": 0,
"indegree": 0,
"is_library": is_library,
Expand All @@ -449,20 +495,14 @@ def functions(self) -> list[dict[str, Any]]:
return rows

def disasm(self, address: int) -> str:
if address not in self._disasm_cache:
func = self._need_function(address)
self._disasm_cache[address] = "\n".join(self._function_disassembly(func))
return self._disasm_cache[address]
func = self._need_function(address)
return "\n".join(self._function_disassembly(func))

def decompile(self, address: int) -> str:
if address not in self._decompile_cache:
self.ensure_decompiler()
func = self._need_function(address)
lines = self._function_pseudocode(func)
self._decompile_cache[address] = (
"\n".join(lines) if isinstance(lines, list) else str(lines)
)
return self._decompile_cache[address]
self.ensure_decompiler()
func = self._need_function(address)
lines = self._function_pseudocode(func)
return "\n".join(lines) if isinstance(lines, list) else str(lines)

def function_summary(self, address: int) -> str:
if address in self._summary_cache:
Expand Down Expand Up @@ -494,8 +534,7 @@ def function_summary(self, address: int) -> str:
]
if callee_names:
lines.append(f"callee_names: {', '.join(callee_names)}")
self._summary_cache[address] = "\n".join(lines)
return self._summary_cache[address]
return "\n".join(lines)

def calls_from(
self, address: int, imports, functions
Expand All @@ -518,7 +557,7 @@ def calls_from(
imported.add(name)
return sorted(edges), sorted(name for name in imported if name)

def _resolve_thunk(self, func):
def _resolve_thunk(self, func: Any) -> Any:
from ida_domain.functions import FunctionFlags

current = func
Expand Down
Loading
Loading