diff --git a/Makefile b/Makefile index 77e47dc..cbd6fec 100644 --- a/Makefile +++ b/Makefile @@ -1,40 +1,21 @@ # Apprentice binary -CC = gcc -CFLAGS = -std=c99 -g -Wall -Wextra -Werror -LDFLAGS = -lm -lmagic -HEADER_FILES = src -C_SOURCE_FILES = src/apprentice.c -OBJECT_FILES = $(C_SOURCE_FILES:.c=.o) -EXECUTABLE_DIRECTORY = priv -EXECUTABLE = $(EXECUTABLE_DIRECTORY)/apprentice +ERL_EI_INCLUDE:=$(shell erl -eval 'io:format("~s", [code:lib_dir(erl_interface, include)])' -s init stop -noshell | head -1) +ERL_EI_LIB:=$(shell erl -eval 'io:format("~s", [code:lib_dir(erl_interface, lib)])' -s init stop -noshell | head -1) +CFLAGS = -std=c99 -g -Wall -Werror +CPPFLAGS = -I$(ERL_EI_INCLUDE) +LDFLAGS = -L$(ERL_EI_LIB) +LDLIBS = -lpthread -lei -lm -lmagic +PRIV = priv/ +RM = rm -Rf -# Unit test custom magic file +all: priv/apprentice -MAGIC = file -TEST_DIRECTORY = test -TARGET_MAGIC = $(TEST_DIRECTORY)/elixir.mgc -SOURCE_MAGIC = $(TEST_DIRECTORY)/elixir - -# Target - -all: $(EXECUTABLE) $(TARGET_MAGIC) - -# Compile - -$(EXECUTABLE): $(OBJECT_FILES) $(EXECUTABLE_DIRECTORY) - $(CC) $(OBJECT_FILES) -o $@ $(LDFLAGS) - -$(EXECUTABLE_DIRECTORY): - mkdir -p $(EXECUTABLE_DIRECTORY) - -.o: - $(CC) $(CFLAGS) $< -o $@ - -# Test case - -$(TARGET_MAGIC): $(SOURCE_MAGIC) - cd $(TEST_DIRECTORY); $(MAGIC) -C -m elixir +priv/apprentice: src/apprentice.c + mkdir -p priv + $(CC) $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) $^ $(LDLIBS) -o $@ clean: - rm -f $(EXECUTABLE) $(OBJECT_FILES) $(BEAM_FILES) + $(RM) $(PRIV) + +.PHONY: clean diff --git a/lib/gen_magic/config.ex b/lib/gen_magic/config.ex index c971e7c..70ae355 100644 --- a/lib/gen_magic/config.ex +++ b/lib/gen_magic/config.ex @@ -5,19 +5,13 @@ defmodule GenMagic.Config do @startup_timeout 1_000 @process_timeout 30_000 @recycle_threshold :infinity - @database_patterns [:default] def get_port_name do {:spawn_executable, to_charlist(get_executable_name())} end - def get_port_options(options) do - arguments = [:use_stdio, :stderr_to_stdout, :binary, :exit_status] - - case get_executable_arguments(options) do - [] -> arguments - list -> [{:args, list} | arguments] - end + def get_port_options(_options) do + [:use_stdio, :binary, :exit_status, {:packet, 2}] end def get_startup_timeout(options) do @@ -36,13 +30,6 @@ defmodule GenMagic.Config do Path.join(:code.priv_dir(@otp_app), @executable_name) end - defp get_executable_arguments(options) do - Enum.flat_map(List.wrap(get(options, :database_patterns, @database_patterns)), fn - :default -> ["--database-default"] - pattern -> pattern |> Path.wildcard() |> Enum.flat_map(&["--database-file", &1]) - end) - end - defp get(options, key, default) do Keyword.get(options, key, default) end diff --git a/lib/gen_magic/helpers.ex b/lib/gen_magic/helpers.ex index 183fe31..13ab3de 100644 --- a/lib/gen_magic/helpers.ex +++ b/lib/gen_magic/helpers.ex @@ -5,7 +5,9 @@ defmodule GenMagic.Helpers do alias GenMagic.Result alias GenMagic.Server - @spec perform_once(Path.t(), [Server.option()]) :: {:ok, Result.t()} | {:error, term()} + + @spec perform_once(Path.t() | {:bytes, binary}, [Server.option()]) :: + {:ok, Result.t()} | {:error, term()} @doc """ Runs a one-shot process without supervision. diff --git a/lib/gen_magic/server.ex b/lib/gen_magic/server.ex index fc76885..c15ea18 100644 --- a/lib/gen_magic/server.ex +++ b/lib/gen_magic/server.ex @@ -9,6 +9,8 @@ defmodule GenMagic.Server do alias GenMagic.Result alias GenMagic.Server.Data alias GenMagic.Server.Status + import Kernel, except: [send: 2] + require Logger @typedoc """ Represents the reference to the underlying server, as returned by `:gen_statem`. @@ -47,6 +49,7 @@ defmodule GenMagic.Server do [:default, "path/to/my/magic"] """ + @database_patterns [:default] @type option :: {:name, atom() | :gen_statem.server_name()} | {:startup_timeout, timeout()} @@ -72,7 +75,7 @@ defmodule GenMagic.Server do - `:recycling`: This is the state the Server will be in, if its underlying C program needs to be recycled. This state is triggered whenever the cycle count reaches the defined value as per `:recycle_threshold`. - + In this state, the Server is able to accept requests, but they will not be processed until the underlying C server program has been started again. """ @@ -80,7 +83,8 @@ defmodule GenMagic.Server do @spec child_spec([option()]) :: Supervisor.child_spec() @spec start_link([option()]) :: :gen_statem.start_ret() - @spec perform(t(), Path.t(), timeout()) :: {:ok, Result.t()} | {:error, term()} + @spec perform(t(), Path.t() | {:bytes, binary()}, timeout()) :: + {:ok, Result.t()} | {:error, term() | String.t()} @spec status(t(), timeout()) :: {:ok, Status.t()} | {:error, term()} @spec stop(t(), term(), timeout()) :: :ok @@ -126,6 +130,20 @@ defmodule GenMagic.Server do end end + @doc """ + Reloads a Server with a new set of databases. + """ + def reload(server_ref, database_patterns \\ nil, timeout \\ 5000) do + :gen_statem.call(server_ref, {:reload, database_patterns}, timeout) + end + + @doc """ + Same as `reload/2,3` but with a full restart of the underlying C port. + """ + def recycle(server_ref, database_patterns \\ nil, timeout \\ 5000) do + :gen_statem.call(server_ref, {:recycle, database_patterns}, timeout) + end + @doc """ Returns status of the Server. """ @@ -153,6 +171,7 @@ defmodule GenMagic.Server do data = %Data{ port_name: get_port_name(), + database_patterns: Keyword.get(options, :database_patterns, []), port_options: get_port_options(options), startup_timeout: get_startup_timeout(options), process_timeout: get_process_timeout(options), @@ -168,11 +187,16 @@ defmodule GenMagic.Server do end @doc false - def starting(:enter, _, %{request: nil, port: nil} = data) do + def starting(:enter, _, %{port: nil} = data) do port = Port.open(data.port_name, data.port_options) {:keep_state, %{data | port: port}, data.startup_timeout} end + @doc false + def starting(:enter, _, data) do + {:keep_state_and_data, data.startup_timeout} + end + @doc false def starting({:call, from}, :status, data) do handle_status_call(from, :starting, data) @@ -184,10 +208,94 @@ defmodule GenMagic.Server do end @doc false - def starting(:info, {port, {:data, "ok\n"}}, %{port: port} = data) do + def starting(:info, {port, {:data, ready}}, %{port: port} = data) do + case :erlang.binary_to_term(ready) do + :ready -> {:next_state, :loading, data} + end + end + + @doc false + def starting(:info, {port, {:exit_status, code}}, %{port: port} = data) do + error = + case code do + 1 -> :bad_db + 2 -> :ei_error + 3 -> :ei_bad_term + code -> {:unexpected_error, code} + end + + {:stop, {:error, error}, data} + end + + @doc false + def loading(:enter, _old_state, data) do + databases = + Enum.flat_map(List.wrap(data.database_patterns || @database_patterns), fn + :default -> [:default] + pattern -> Path.wildcard(pattern) + end) + + databases = + if databases == [] do + [:default] + else + databases + end + + {:keep_state, {databases, data}, {:state_timeout, 0, :load}} + end + + @doc false + def loading(:state_timeout, :load_timeout, {[database | _], data}) do + {:stop, {:error, {:database_loading_timeout, database}}, data} + end + + @doc false + def loading(:state_timeout, :load, {[], data}) do {:next_state, :available, data} end + @doc false + def loading(:state_timeout, :load, {[database | databases], data} = state) do + command = + case database do + :default -> {:add_default_database, nil} + path -> {:add_database, database} + end + + send(data.port, command) + {:keep_state, state, {:state_timeout, data.startup_timeout, :load_timeout}} + end + + @doc false + def loading(:info, {port, {:data, response}}, {[database | databases], %{port: port} = data}) do + case :erlang.binary_to_term(response) do + {:ok, :loaded} -> + {:keep_state, {databases, data}, {:state_timeout, 0, :load}} + end + end + + @doc false + def loading(:info, {port, {:exit_status, 1}}, {[database | _], %{port: port} = data}) do + {:stop, {:error, {:database_not_found, database}}, data} + end + + @doc false + def loading({:call, from}, :status, {[database | _], data}) do + handle_status_call(from, :loading, data) + end + + @doc false + def loading({:call, _from}, {:perform, _path}, _data) do + {:keep_state_and_data, :postpone} + end + + @doc false + def available(:enter, _old_state, %{request: {:reload, from, _}}) do + response = {:reply, from, :ok} + {:keep_state_and_data, response} + end + @doc false def available(:enter, _old_state, %{request: nil}) do :keep_state_and_data @@ -196,10 +304,39 @@ defmodule GenMagic.Server do @doc false def available({:call, from}, {:perform, path}, data) do data = %{data | cycles: data.cycles + 1, request: {path, from, :erlang.now()}} - _ = send(data.port, {self(), {:command, "file; " <> path <> "\n"}}) + + arg = + case path do + path when is_binary(path) -> {:file, path} + {:bytes, bytes} -> {:bytes, bytes} + end + + send(data.port, arg) {:next_state, :processing, data} end + @doc false + def available({:call, from}, {:reload, databases}, data) do + send(data.port, {:reload, :reload}) + + {:next_state, :starting, + %{ + data + | database_patterns: databases || data.database_patterns, + request: {:reload, from, :reload} + }} + end + + @doc false + def available({:call, from}, {:recycle, databases}, data) do + {:next_state, :recycling, + %{ + data + | database_patterns: databases || data.database_patterns, + request: {:reload, from, :recycle} + }} + end + @doc false def available({:call, from}, :status, data) do handle_status_call(from, :available, data) @@ -221,18 +358,22 @@ defmodule GenMagic.Server do end @doc false - def processing(:info, {port, {:data, response}}, %{port: port} = data) do - {_, from, _} = data.request - data = %{data | request: nil} + def processing(:state_timeout, _, %{port: port, request: {_, from, _}} = data) do + response = {:reply, from, {:error, :timeout}} + {:next_state, :recycling, %{data | request: nil}, [response, :hibernate]} + end + + @doc false + def processing(:info, {port, {:data, response}}, %{port: port, request: {_, from, _}} = data) do response = {:reply, from, handle_response(response)} next_state = (data.cycles >= data.recycle_threshold && :recycling) || :available - {:next_state, next_state, data, response} + {:next_state, next_state, %{data | request: nil}, [response, :hibernate]} end @doc false - def recycling(:enter, _, %{request: nil, port: port} = data) when is_port(port) do - _ = send(data.port, {self(), :close}) - {:keep_state_and_data, data.startup_timeout} + def recycling(:enter, _, %{port: port} = data) when is_port(port) do + send(data.port, {:stop, :recycle}) + {:keep_state_and_data, {:state_timeout, data.startup_timeout, :stop}} end @doc false @@ -245,20 +386,60 @@ defmodule GenMagic.Server do handle_status_call(from, :recycling, data) end + @doc false + # In case of timeout, force close. + def recycling(:state_timeout, :stop, data) do + Kernel.send(data.port, {self(), :close}) + {:keep_state_and_data, {:state_timeout, data.startup_timeout, :close}} + end + + @doc false + def recycling(:state_timeout, :close, data) do + {:stop, {:error, :port_close_failed}} + end + @doc false def recycling(:info, {port, :closed}, %{port: port} = data) do {:next_state, :starting, %{data | port: nil, cycles: 0}} end - defp handle_response("ok; " <> message) do - case message |> String.trim() |> String.split("\t") do - [mime_type, encoding, content] -> {:ok, Result.build(mime_type, encoding, content)} - _ -> {:error, :malformed_response} - end + @doc false + def recycling(:info, {port, {:exit_status, _}}, %{port: port} = data) do + {:next_state, :starting, %{data | port: nil, cycles: 0}} end - defp handle_response("error; " <> message) do - {:error, String.trim(message)} + @doc false + @impl :gen_statem + def terminate(_, _, %{port: port}) do + Kernel.send(port, {self(), :close}) + end + + @doc false + def terminate(_, _, _) do + :ok + end + + defp send(port, command) do + Kernel.send(port, {self(), {:command, :erlang.term_to_binary(command)}}) + end + + @errnos %{ + 2 => :enoent, + 13 => :eaccess, + 20 => :enotdir, + 12 => :enomem, + 24 => :emfile, + 36 => :enametoolong + } + @errno Map.keys(@errnos) + + defp handle_response(data) do + case :erlang.binary_to_term(data) do + {:ok, {mime_type, encoding, content}} -> {:ok, Result.build(mime_type, encoding, content)} + {:error, {errno, _}} when errno in @errno -> {:error, @errnos[errno]} + {:error, {errno, string}} -> {:error, "#{errno}: #{string}"} + {:error, _} = error -> error + end end defp handle_status_call(from, state, data) do diff --git a/lib/gen_magic/server/data.ex b/lib/gen_magic/server/data.ex index 6836327..25d23e9 100644 --- a/lib/gen_magic/server/data.ex +++ b/lib/gen_magic/server/data.ex @@ -21,5 +21,6 @@ defmodule GenMagic.Server.Data do process_timeout: :infinity, recycle_threshold: :infinity, cycles: 0, + database_patterns: nil, request: nil end diff --git a/src/apprentice.c b/src/apprentice.c index 8105b14..da8afaf 100644 --- a/src/apprentice.c +++ b/src/apprentice.c @@ -2,283 +2,424 @@ // The Sorcerer’s Apprentice // // To use this program, compile it with dynamically linked libmagic, as mirrored -// at https://github.com/threatstack/libmagic. You may install it with apt-get, yum or brew. -// Refer to the Makefile for further reference. +// at https://github.com/file/file. You may install it with apt-get, +// yum or brew. Refer to the Makefile for further reference. // -// This program is designed to run interactively as a backend daemon to the GenMagic library, -// and follows the command line pattern: +// This program is designed to run interactively as a backend daemon to the +// GenMagic library. // -// $ apprentice --database-file --database-default +// Communication is done over STDIN/STDOUT as binary packets of 2 bytes length +// plus X bytes payload, where the payload is an erlang term encoded with +// :erlang.term_to_binary/1 and decoded with :erlang.binary_to_term/1. // -// Where each argument either refers to a compiled or uncompiled magic database, or the default -// database. They will be loaded in the sequence that they were specified. Note that you must -// specify at least one database. +// Once the program is ready, it sends the `:ready` atom. // -// Once the program starts, it will print info statements if run from a terminal, then it will -// print `ok`. From this point onwards, additional commands can be passed: -// -// file; +// It is then up to the Erlang side to load databases, by sending messages: +// - `{:add_database, path}` +// - `{:add_default_database, _}` // -// Results will be printed tab-separated, e.g.: +// If the requested database have been loaded, an `{:ok, :loaded}` message will +// follow. Otherwise, the process will exit (exit code 1). // -// ok; application/zip binary Zip archive data, at least v1.0 to extract - +// Commands are sent to the program STDIN as an erlang term of `{Operation, +// Argument}`, and response of `{:ok | :error, Response}`. +// +// Invalid packets will cause the program to exit (exit code 3). This will +// happen if your Erlang Term format doesn't match the version the program has +// been compiled with, or if you send a command too huge. +// +// The program may exit with exit code 3 if something went wrong with ei_* +// functions. +// +// Commands: +// {:reload, _} :: :ready +// {:add_database, String.t()} :: {:ok, _} | {:error, _} +// {:add_default_database, _} :: {:ok, _} | {:error, _} +// {:file, path :: String.t()} :: {:ok, {type, encoding, name}} | {:error, +// :badarg} | {:error, {errno :: integer(), String.t()}} +// {:bytes, binary()} :: same as :file +// {:stop, reason :: atom()} :: exit 0 + +#include +#include #include #include #include +#include #include #include #include #include -#include #include +#include #include -#include - -#define USAGE "[--database-file | --database-default, ...]" -#define DELIMITER "\t" #define ERROR_OK 0 -#define ERROR_NO_DATABASE 1 -#define ERROR_NO_ARGUMENT 2 -#define ERROR_MISSING_DATABASE 3 +#define ERROR_DB 1 +#define ERROR_EI 2 +#define ERROR_BAD_TERM 3 -#define ANSI_INFO "\x1b[37m" // gray -#define ANSI_OK "\x1b[32m" // green -#define ANSI_ERROR "\x1b[31m" // red -#define ANSI_IGNORE "\x1b[90m" // red -#define ANSI_RESET "\x1b[0m" +// We use a bigger than possible valid command length (around 4111 bytes) to +// allow more precise errors when using too long paths. +#define COMMAND_LEN 8000 +#define COMMAND_BUFFER_SIZE COMMAND_LEN + 1 -#define MAGIC_FLAGS_COMMON (MAGIC_CHECK|MAGIC_ERROR) +#define MAGIC_FLAGS_COMMON (MAGIC_CHECK | MAGIC_ERROR) magic_t magic_setup(int flags); +#define EI_ENSURE(result) \ + do { \ + if (result != 0) { \ + fprintf(stderr, "EI ERROR, line: %d", __LINE__); \ + exit(ERROR_EI); \ + } \ + } while (0); + +typedef char byte; + void setup_environment(); -void setup_options(int argc, char **argv); -void setup_options_file(char *optarg); -void setup_options_default(); -void setup_system(); -void process_line(char *line); -void process_file(char *path); -void print_info(const char *format, ...); -void print_ok(const char *format, ...); -void print_error(const char *format, ...); - -struct magic_file { - struct magic_file *prev; - struct magic_file *next; - char *path; -}; - -static struct magic_file* magic_database; -static magic_t magic_mime_type; // MAGIC_MIME_TYPE +void magic_open_all(); +int magic_load_all(char *path); +int process_command(uint16_t len, byte *buf); +void process_file(char *path, ei_x_buff *result); +void process_bytes(char *bytes, int size, ei_x_buff *result); +size_t read_cmd(byte *buf); +size_t write_cmd(byte *buf, size_t len); +void error(ei_x_buff *result, const char *error); +void handle_magic_error(magic_t handle, int errn, ei_x_buff *result); +void fdseek(uint16_t count); + +static magic_t magic_mime_type; // MAGIC_MIME_TYPE static magic_t magic_mime_encoding; // MAGIC_MIME_ENCODING -static magic_t magic_type_name; // MAGIC_NONE +static magic_t magic_type_name; // MAGIC_NONE -int main (int argc, char **argv) { +int main(int argc, char **argv) { + EI_ENSURE(ei_init()); setup_environment(); - setup_options(argc, argv); - setup_system(); - printf("ok\n"); - fflush(stdout); - - char line[4096]; - while (fgets(line, 4096, stdin)) { - process_line(line); + magic_open_all(); + + byte buf[COMMAND_BUFFER_SIZE]; + uint16_t len; + while ((len = read_cmd(buf)) > 0) { + process_command(len, buf); } - return 0; + return 255; } -void setup_environment() { - opterr = 0; -} +int process_command(uint16_t len, byte *buf) { + ei_x_buff result; + char atom[128]; + int index, version, arity, termtype, termsize; + index = 0; -void setup_options(int argc, char **argv) { - const char *option_string = "f:"; - static struct option long_options[] = { - {"database-file", required_argument, 0, 'f'}, - {"database-default", no_argument, 0, 'd'}, - {0, 0, 0, 0} - }; - - int option_character; - while (1) { - int option_index = 0; - option_character = getopt_long(argc, argv, option_string, long_options, &option_index); - if (-1 == option_character) { - break; - } - switch (option_character) { - case 'f': { - setup_options_file(optarg); - break; - } - case 'd': { - setup_options_default(); - break; - } - case '?': - default: { - print_info("%s %s\n", basename(argv[0]), USAGE); - exit(ERROR_NO_ARGUMENT); - break; - } - } + // Initialize result + EI_ENSURE(ei_x_new_with_version(&result)); + EI_ENSURE(ei_x_encode_tuple_header(&result, 2)); + + if (len >= COMMAND_LEN) { + error(&result, "badarg"); + return 1; } -} -void setup_options_file(char *optarg) { - print_info("Requested database %s", optarg); - if (0 != access(optarg, R_OK)) { - print_error("Missing Database"); - exit(ERROR_MISSING_DATABASE); + if (ei_decode_version(buf, &index, &version) != 0) { + exit(ERROR_BAD_TERM); } - struct magic_file *next = malloc(sizeof(struct magic_file)); - size_t path_length = strlen(optarg) + 1; - char *path = malloc(path_length); - memcpy(path, optarg, path_length); - next->path = path; - next->prev = magic_database; - if (magic_database) { - magic_database->next = next; + if (ei_decode_tuple_header(buf, &index, &arity) != 0) { + error(&result, "badarg"); + return 1; } - magic_database = next; -} -void setup_options_default() { - print_info("requested default database"); + if (arity != 2) { + error(&result, "badarg"); + return 1; + } - struct magic_file *next = malloc(sizeof(struct magic_file)); - next->path = NULL; - next->prev = magic_database; - if (magic_database) { - magic_database->next = next; + if (ei_decode_atom(buf, &index, atom) != 0) { + error(&result, "badarg"); + return 1; + } + + // {:file, path} + if (strlen(atom) == 4 && strncmp(atom, "file", 4) == 0) { + char path[4097]; + ei_get_type(buf, &index, &termtype, &termsize); + + if (termtype == ERL_BINARY_EXT) { + if (termsize < 4096) { + long bin_length; + EI_ENSURE(ei_decode_binary(buf, &index, path, &bin_length)); + path[termsize] = '\0'; + process_file(path, &result); + } else { + error(&result, "enametoolong"); + return 1; + } + } else { + error(&result, "badarg"); + return 1; + } + // {:bytes, bytes} + } else if (strlen(atom) == 5 && strncmp(atom, "bytes", 5) == 0) { + int termtype; + int termsize; + char bytes[51]; + EI_ENSURE(ei_get_type(buf, &index, &termtype, &termsize)); + + if (termtype == ERL_BINARY_EXT && termsize < 50) { + long bin_length; + EI_ENSURE(ei_decode_binary(buf, &index, bytes, &bin_length)); + bytes[termsize] = '\0'; + process_bytes(bytes, termsize, &result); + } else { + error(&result, "badarg"); + return 1; + } + // {:add_database, path} + } else if (strlen(atom) == 12 && strncmp(atom, "add_database", 12) == 0) { + char path[4097]; + ei_get_type(buf, &index, &termtype, &termsize); + + if (termtype == ERL_BINARY_EXT) { + if (termsize < 4096) { + long bin_length; + EI_ENSURE(ei_decode_binary(buf, &index, path, &bin_length)); + path[termsize] = '\0'; + if (magic_load_all(path) == 0) { + EI_ENSURE(ei_x_encode_atom(&result, "ok")); + EI_ENSURE(ei_x_encode_atom(&result, "loaded")); + } else { + exit(ERROR_DB); + } + } else { + error(&result, "enametoolong"); + return 1; + } + } else { + error(&result, "badarg"); + return 1; + } + // {:add_default_database, _} + } else if (strlen(atom) == 20 && + strncmp(atom, "add_default_database", 20) == 0) { + if (magic_load_all(NULL) == 0) { + EI_ENSURE(ei_x_encode_atom(&result, "ok")); + EI_ENSURE(ei_x_encode_atom(&result, "loaded")); + } else { + exit(ERROR_DB); + } + // {:reload, _} + } else if (strlen(atom) == 6 && strncmp(atom, "reload", 6) == 0) { + magic_open_all(); + return 0; + // {:stop, _} + } else if (strlen(atom) == 4 && strncmp(atom, "stop", 4) == 0) { + exit(ERROR_OK); + // badarg + } else { + error(&result, "badarg"); + return 1; } - magic_database = next; -} -void setup_system() { - magic_mime_encoding = magic_setup(MAGIC_FLAGS_COMMON|MAGIC_MIME_ENCODING); - magic_mime_type = magic_setup(MAGIC_FLAGS_COMMON|MAGIC_MIME_TYPE); - magic_type_name = magic_setup(MAGIC_FLAGS_COMMON|MAGIC_NONE); + write_cmd(result.buff, result.index); + + EI_ENSURE(ei_x_free(&result)); + return 0; } -magic_t magic_setup(int flags) { - print_info("starting libmagic instance for flags %i", flags); +void setup_environment() { opterr = 0; } - magic_t magic = magic_open(flags); - struct magic_file *current_database = magic_database; - if (!current_database) { - print_error("no database configured"); - exit(ERROR_NO_DATABASE); +void magic_open_all() { + if (magic_mime_encoding) { + magic_close(magic_mime_encoding); } + if (magic_mime_type) { + magic_close(magic_mime_type); + } + if (magic_type_name) { + magic_close(magic_type_name); + } + magic_mime_encoding = magic_open(MAGIC_FLAGS_COMMON | MAGIC_MIME_ENCODING); + magic_mime_type = magic_open(MAGIC_FLAGS_COMMON | MAGIC_MIME_TYPE); + magic_type_name = magic_open(MAGIC_FLAGS_COMMON | MAGIC_NONE); + + ei_x_buff ok_buf; + EI_ENSURE(ei_x_new_with_version(&ok_buf)); + EI_ENSURE(ei_x_encode_atom(&ok_buf, "ready")); + write_cmd(ok_buf.buff, ok_buf.index); + EI_ENSURE(ei_x_free(&ok_buf)); +} - while (current_database->prev) { - current_database = current_database->prev; +int magic_load_all(char *path) { + int res; + + if ((res = magic_load(magic_mime_encoding, path)) != 0) { + return res; } - while (current_database) { - if (isatty(STDERR_FILENO)) { - fprintf(stderr, ANSI_IGNORE); - } - if (!current_database->path) { - print_info("loading default database"); - } else { - print_info("loading database %s", current_database->path); - } - magic_load(magic, current_database->path); - if (isatty(STDERR_FILENO)) { - fprintf(stderr, ANSI_RESET); - } - current_database = current_database->next; + if ((res = magic_load(magic_mime_type, path)) != 0) { + return res; } - return magic; + if ((res = magic_load(magic_type_name, path)) != 0) { + return res; + } + return 0; } -void process_line(char *line) { - char path[4096]; +void process_bytes(char *path, int size, ei_x_buff *result) { + const char *mime_type_result = magic_buffer(magic_mime_type, path, size); + const int mime_type_errno = magic_errno(magic_mime_type); - if (0 == strcmp(line, "exit\n")) { - exit(ERROR_OK); + if (mime_type_errno > 0) { + handle_magic_error(magic_mime_type, mime_type_errno, result); + return; } - if (1 != sscanf(line, "file; %[^\n]s", path)) { - print_error("invalid commmand"); + + const char *mime_encoding_result = + magic_buffer(magic_mime_encoding, path, size); + int mime_encoding_errno = magic_errno(magic_mime_encoding); + + if (mime_encoding_errno > 0) { + handle_magic_error(magic_mime_encoding, mime_encoding_errno, result); return; } - if (0 != access(path, R_OK)) { - print_error("unable to access file"); + const char *type_name_result = magic_buffer(magic_type_name, path, size); + int type_name_errno = magic_errno(magic_type_name); + + if (type_name_errno > 0) { + handle_magic_error(magic_type_name, type_name_errno, result); return; } - process_file(path); + EI_ENSURE(ei_x_encode_atom(result, "ok")); + EI_ENSURE(ei_x_encode_tuple_header(result, 3)); + EI_ENSURE( + ei_x_encode_binary(result, mime_type_result, strlen(mime_type_result))); + EI_ENSURE(ei_x_encode_binary(result, mime_encoding_result, + strlen(mime_encoding_result))); + EI_ENSURE( + ei_x_encode_binary(result, type_name_result, strlen(type_name_result))); + return; } -void process_file(char *path) { +void handle_magic_error(magic_t handle, int errn, ei_x_buff *result) { + const char *error = magic_error(handle); + EI_ENSURE(ei_x_encode_atom(result, "error")); + EI_ENSURE(ei_x_encode_tuple_header(result, 2)); + long errlon = (long)errn; + EI_ENSURE(ei_x_encode_long(result, errlon)); + EI_ENSURE(ei_x_encode_binary(result, error, strlen(error))); + return; +} + +void process_file(char *path, ei_x_buff *result) { const char *mime_type_result = magic_file(magic_mime_type, path); - const char *mime_type_error = magic_error(magic_mime_type); - const char *mine_encoding_result = magic_file(magic_mime_encoding, path); - const char *mine_encoding_error = magic_error(magic_mime_encoding); - const char *type_name_result = magic_file(magic_type_name, path); - const char *type_name_error = magic_error(magic_type_name); + const int mime_type_errno = magic_errno(magic_mime_type); - if (mime_type_error) { - print_error(mime_type_error); + if (mime_type_errno > 0) { + handle_magic_error(magic_mime_type, mime_type_errno, result); return; } - if (mine_encoding_error) { - print_error(mine_encoding_error); + const char *mime_encoding_result = magic_file(magic_mime_encoding, path); + int mime_encoding_errno = magic_errno(magic_mime_encoding); + + if (mime_encoding_errno > 0) { + handle_magic_error(magic_mime_encoding, mime_encoding_errno, result); return; } - if (type_name_error) { - print_error(type_name_error); + const char *type_name_result = magic_file(magic_type_name, path); + int type_name_errno = magic_errno(magic_type_name); + + if (type_name_errno > 0) { + handle_magic_error(magic_type_name, type_name_errno, result); return; } - print_ok("%s%s%s%s%s", mime_type_result, DELIMITER, mine_encoding_result, DELIMITER, type_name_result); + EI_ENSURE(ei_x_encode_atom(result, "ok")); + EI_ENSURE(ei_x_encode_tuple_header(result, 3)); + EI_ENSURE( + ei_x_encode_binary(result, mime_type_result, strlen(mime_type_result))); + EI_ENSURE(ei_x_encode_binary(result, mime_encoding_result, + strlen(mime_encoding_result))); + EI_ENSURE( + ei_x_encode_binary(result, type_name_result, strlen(type_name_result))); + return; } -void print_info(const char *format, ...) { - if (!isatty(STDOUT_FILENO)) { - return; - } +// Adapted from https://erlang.org/doc/tutorial/erl_interface.html +// Changed `read_cmd`, the original one was buggy given some length (due to +// endinaness). +// TODO: Check if `write_cmd` exhibits the same issue. +size_t read_exact(byte *buf, size_t len) { + int i, got = 0; + + do { + if ((i = read(0, buf + got, len - got)) <= 0) { + return (i); + } + got += i; + } while (got < len); - printf(ANSI_INFO "info; " ANSI_RESET); - va_list arguments; - va_start(arguments, format); - vprintf(format, arguments); - va_end(arguments); - printf("\n"); + return (len); } -void print_ok(const char *format, ...) { - if (isatty(STDOUT_FILENO)) { - printf(ANSI_OK "ok; " ANSI_RESET); - } else { - printf("ok; "); - } +size_t write_exact(byte *buf, size_t len) { + int i, wrote = 0; + + do { + if ((i = write(1, buf + wrote, len - wrote)) <= 0) + return (i); + wrote += i; + } while (wrote < len); - va_list arguments; - va_start(arguments, format); - vprintf(format, arguments); - va_end(arguments); - printf("\n"); - fflush(stdout); + return (len); } -void print_error(const char *format, ...) { - if (isatty(STDERR_FILENO)) { - fprintf(stderr, ANSI_ERROR "error; " ANSI_RESET); - } else { - fprintf(stderr, "error; "); +size_t read_cmd(byte *buf) { + int i; + if ((i = read(0, buf, sizeof(uint16_t))) <= 0) { + return (i); + } + uint16_t len16 = *(uint16_t *)buf; + len16 = ntohs(len16); + + // Buffer isn't large enough: just return possible len, without reading. + // Up to the caller of verifying the size again and return an error. + // buf left unchanged, stdin emptied of X bytes. + if (len16 > COMMAND_LEN) { + fdseek(len16); + return len16; } - va_list arguments; - va_start(arguments, format); - vfprintf(stderr, format, arguments); - va_end(arguments); - fprintf(stderr, "\n"); - fflush(stderr); + return read_exact(buf, len16); +} + +size_t write_cmd(byte *buf, size_t len) { + byte li; + + li = (len >> 8) & 0xff; + write_exact(&li, 1); + + li = len & 0xff; + write_exact(&li, 1); + + return write_exact(buf, len); +} + +void error(ei_x_buff *result, const char *error) { + EI_ENSURE(ei_x_encode_atom(result, "error")); + EI_ENSURE(ei_x_encode_atom(result, error)); + write_cmd(result->buff, result->index); + EI_ENSURE(ei_x_free(result)); +} + +void fdseek(uint16_t count) { + int i = 0; + while (i < count) { + getchar(); + i += 1; + } } diff --git a/test/gen_magic/apprentice_test.exs b/test/gen_magic/apprentice_test.exs new file mode 100644 index 0000000..dfb86a0 --- /dev/null +++ b/test/gen_magic/apprentice_test.exs @@ -0,0 +1,165 @@ +defmodule GenMagic.ApprenticeTest do + use GenMagic.MagicCase + + @tmp_path "/tmp/testgenmagicx" + require Logger + + test "sends ready" do + port = Port.open(GenMagic.Config.get_port_name(), GenMagic.Config.get_port_options([])) + on_exit(fn -> send(port, {self(), :close}) end) + assert_ready_and_init_default(port) + end + + test "stops" do + port = Port.open(GenMagic.Config.get_port_name(), GenMagic.Config.get_port_options([])) + on_exit(fn -> send(port, {self(), :close}) end) + assert_ready_and_init_default(port) + send(port, {self(), {:command, :erlang.term_to_binary({:stop, :stop})}}) + assert_receive {^port, {:exit_status, 0}} + end + + test "exits with non existent database with an error" do + opts = [:use_stdio, :binary, :exit_status, {:packet, 2}, {:args, []}] + port = Port.open(GenMagic.Config.get_port_name(), opts) + on_exit(fn -> send(port, {self(), :close}) end) + assert_ready(port) + + send( + port, + {self(), {:command, :erlang.term_to_binary({:add_database, "/somewhere/nowhere"})}} + ) + + assert_receive {^port, {:exit_status, 1}} + end + + describe "port" do + setup do + port = Port.open(GenMagic.Config.get_port_name(), GenMagic.Config.get_port_options([])) + on_exit(fn -> send(port, {self(), :close}) end) + assert_ready_and_init_default(port) + %{port: port} + end + + test "exits with badly formatted erlang terms", %{port: port} do + send(port, {self(), {:command, "i forgot to term_to_binary!!"}}) + assert_receive {^port, {:exit_status, 3}} + end + + test "errors with wrong command", %{port: port} do + send(port, {self(), {:command, :erlang.term_to_binary(:wrong)}}) + assert_receive {^port, {:data, data}} + assert {:error, :badarg} = :erlang.binary_to_term(data) + refute_receive _ + + send(port, {self(), {:command, :erlang.term_to_binary({:file, 42})}}) + assert_receive {^port, {:data, data}} + assert {:error, :badarg} = :erlang.binary_to_term(data) + refute_receive _ + + send(port, {self(), {:command, :erlang.term_to_binary("more wrong")}}) + assert_receive {^port, {:data, data}} + assert {:error, :badarg} = :erlang.binary_to_term(data) + refute_receive _ + + send(port, {self(), {:command, :erlang.term_to_binary({"no", "no"})}}) + assert_receive {^port, {:data, data}} + assert {:error, :badarg} = :erlang.binary_to_term(data) + refute_receive _ + end + + test "file works", %{port: port} do + send(port, {self(), {:command, :erlang.term_to_binary({:file, Path.expand("Makefile")})}}) + assert_receive {^port, {:data, data}} + assert {:ok, _} = :erlang.binary_to_term(data) + end + + test "bytes works", %{port: port} do + send(port, {self(), {:command, :erlang.term_to_binary({:bytes, "some bytes!"})}}) + assert_receive {^port, {:data, data}} + assert {:ok, _} = :erlang.binary_to_term(data) + end + + test "fails with non existent file", %{port: port} do + send(port, {self(), {:command, :erlang.term_to_binary({:file, "/path/to/nowhere"})}}) + assert_receive {^port, {:data, data}} + assert {:error, _} = :erlang.binary_to_term(data) + end + + test "works with big file path", %{port: port} do + # Test with longest valid path. + {dir, bigfile} = too_big(@tmp_path, "/a") + + case File.mkdir_p(dir) do + :ok -> + File.touch!(bigfile) + on_exit(fn -> File.rm_rf!(@tmp_path) end) + send(port, {self(), {:command, :erlang.term_to_binary({:file, bigfile})}}) + assert_receive {^port, {:data, data}} + assert {:ok, _} = :erlang.binary_to_term(data) + refute_receive _ + + # This path should be long enough for buffers, but larger than a valid path name. + # Magic will return an errno 36. + file = @tmp_path <> String.duplicate("a", 256) + send(port, {self(), {:command, :erlang.term_to_binary({:file, file})}}) + assert_receive {^port, {:data, data}} + assert {:error, {36, _}} = :erlang.binary_to_term(data) + refute_receive _ + # Theses filename should be too big for the path buffer. + file = bigfile <> "aaaaaaaaaa" + send(port, {self(), {:command, :erlang.term_to_binary({:file, file})}}) + assert_receive {^port, {:data, data}} + assert {:error, :enametoolong} = :erlang.binary_to_term(data) + refute_receive _ + # This call should be larger than the COMMAND_BUFFER_SIZE. Ensure nothing bad happens! + file = String.duplicate(bigfile, 4) + send(port, {self(), {:command, :erlang.term_to_binary({:file, file})}}) + assert_receive {^port, {:data, data}} + assert {:error, :badarg} = :erlang.binary_to_term(data) + refute_receive _ + # We re-run a valid call to ensure the buffer/... haven't been corrupted in port land. + send(port, {self(), {:command, :erlang.term_to_binary({:file, bigfile})}}) + assert_receive {^port, {:data, data}} + assert {:ok, _} = :erlang.binary_to_term(data) + refute_receive _ + + {:error, :enametoolong} -> + Logger.info( + "Skipping test, operating system does not support max POSIX length for directories" + ) + + :ignore + end + end + end + + def assert_ready(port) do + assert_receive {^port, {:data, data}} + assert :ready == :erlang.binary_to_term(data) + end + + def assert_ready_and_init_default(port) do + assert_receive {^port, {:data, data}} + assert :ready == :erlang.binary_to_term(data) + send(port, {self(), {:command, :erlang.term_to_binary({:add_default_database, nil})}}) + assert_receive {^port, {:data, data}} + assert {:ok, _} = :erlang.binary_to_term(data) + end + + def too_big(path, filename, limit \\ 4095) do + last_len = byte_size(filename) + path_len = byte_size(path) + needed = limit - (last_len + path_len) + extra = make_too_big(needed, "") + {path <> extra, path <> extra <> filename} + end + + def make_too_big(needed, acc) when needed <= 255 do + acc <> "/" <> String.duplicate("a", needed - 1) + end + + def make_too_big(needed, acc) do + acc = acc <> "/" <> String.duplicate("a", 254) + make_too_big(needed - 255, acc) + end +end diff --git a/test/gen_magic/gen_magic_test.exs b/test/gen_magic/gen_magic_test.exs index dbd287c..92574f4 100644 --- a/test/gen_magic/gen_magic_test.exs +++ b/test/gen_magic/gen_magic_test.exs @@ -24,7 +24,6 @@ defmodule GenMagicTest do end test "Non-existent file" do - Process.flag(:trap_exit, true) {:ok, pid} = GenMagic.Server.start_link([]) path = missing_filename() assert_no_file(GenMagic.Server.perform(pid, path)) @@ -41,13 +40,30 @@ defmodule GenMagicTest do assert "text/x-makefile" = result.mime_type end - test "Custom database file recognises Elixir files" do - database = absolute_path("test/elixir.mgc") - {:ok, pid} = GenMagic.Server.start_link(database_patterns: [database]) - path = absolute_path("mix.exs") - assert {:ok, %Result{} = result} = GenMagic.Server.perform(pid, path) - assert "text/x-elixir" = result.mime_type - assert "us-ascii" = result.encoding - assert "Elixir module source text" = result.content + describe "custom database" do + setup do + database = absolute_path("elixir.mgc") + on_exit(fn -> File.rm(database) end) + {_, 0} = System.cmd("file", ["-C", "-m", absolute_path("test/elixir")]) + [database: database] + end + + test "recognises Elixir files", %{database: database} do + {:ok, pid} = GenMagic.Server.start_link(database_patterns: [database]) + path = absolute_path("mix.exs") + assert {:ok, %Result{} = result} = GenMagic.Server.perform(pid, path) + assert "text/x-elixir" = result.mime_type + assert "us-ascii" = result.encoding + assert "Elixir module source text" = result.content + end + + test "recognises Elixir files after a reload", %{database: database} do + {:ok, pid} = GenMagic.Server.start_link([]) + path = absolute_path("mix.exs") + {:ok, %Result{mime_type: mime}} = GenMagic.Server.perform(pid, path) + refute mime == "text/x-elixir" + :ok = GenMagic.Server.reload(pid, [database]) + assert {:ok, %Result{mime_type: "text/x-elixir"}} = GenMagic.Server.perform(pid, path) + end end end diff --git a/test/gen_magic/server_test.exs b/test/gen_magic/server_test.exs index e152145..d00933a 100644 --- a/test/gen_magic/server_test.exs +++ b/test/gen_magic/server_test.exs @@ -12,6 +12,7 @@ defmodule GenMagic.ServerTest do assert {:ok, _} = GenMagic.Server.perform(pid, path) assert {:ok, %{cycles: 2}} = GenMagic.Server.status(pid) assert {:ok, _} = GenMagic.Server.perform(pid, path) + Process.sleep(100) assert {:ok, %{cycles: 0}} = GenMagic.Server.status(pid) end @@ -20,11 +21,24 @@ defmodule GenMagic.ServerTest do path = absolute_path("Makefile") assert {:ok, %{cycles: 0}} = GenMagic.Server.status(pid) assert {:ok, _} = GenMagic.Server.perform(pid, path) + Process.sleep(100) assert {:ok, %{cycles: 0}} = GenMagic.Server.status(pid) assert {:ok, _} = GenMagic.Server.perform(pid, path) + Process.sleep(100) assert {:ok, %{cycles: 0}} = GenMagic.Server.status(pid) assert {:ok, _} = GenMagic.Server.perform(pid, path) + Process.sleep(100) assert {:ok, %{cycles: 0}} = GenMagic.Server.status(pid) end end + + test "recycle" do + {:ok, pid} = GenMagic.Server.start_link([]) + path = absolute_path("Makefile") + assert {:ok, %{cycles: 0}} = GenMagic.Server.status(pid) + assert {:ok, _} = GenMagic.Server.perform(pid, path) + assert {:ok, %{cycles: 1}} = GenMagic.Server.status(pid) + assert :ok = GenMagic.Server.recycle(pid) + assert {:ok, %{cycles: 0}} = GenMagic.Server.status(pid) + end end diff --git a/test/support/magic_case.ex b/test/support/magic_case.ex index 4915861..53df01e 100644 --- a/test/support/magic_case.ex +++ b/test/support/magic_case.ex @@ -20,8 +20,8 @@ defmodule GenMagic.MagicCase do |> Stream.flat_map(&Enum.shuffle/1) end - def assert_no_file({:error, message}) do - assert "unable to access file" = message + def assert_no_file(message) do + assert {:error, :enoent} = message end def absolute_path(path) do