diff --git a/apps/csv2sql/lib/csv2sql/data_transfer.ex b/apps/csv2sql/lib/csv2sql/data_transfer.ex deleted file mode 100644 index 2c4f01f..0000000 --- a/apps/csv2sql/lib/csv2sql/data_transfer.ex +++ /dev/null @@ -1,56 +0,0 @@ -# defmodule Csv2sql.DataTransfer do -# alias NimbleCSV.RFC4180, as: CSV -# alias Csv2sql.{JobQueueServer, Helpers} - -# @doc """ -# Divides a csv file in chunks and place them in a job queue. -# Whenever a DB worker is free it will pick up a chunk from the queue -# and insert it in the database. -# """ -# def process_file(file) do -# Helpers.print_msg("Begin data tranfer for file: " <> Path.basename(file)) - -# insertion_chunk_size = -# Application.get_env(:csv2sql, Csv2sql.get_repo())[:insertion_chunk_size] - -# file -# |> File.stream!() -# |> CSV.parse_stream() -# |> Stream.chunk_every(insertion_chunk_size) -# |> Enum.each(fn data_chunk -> -# check_job_queue(file, data_chunk) -# end) - -# wait_for_file_transfer(file) -# end - -# # Wait until all chunks for the current file in the job queue has been processed -# # `:timer.sleep(300)` waits for the last chunk in queue to get inserted that is -# # if no, chunks were present on the job queue this means a DB worker has picked -# # up the chunk for insertion, so we wait for 300ms for the chunk to get inserted. -# defp wait_for_file_transfer(file) do -# if Csv2sql.JobQueueServer.job_for_file_present(file) do -# wait_for_file_transfer(file) -# else -# imported_csv_directory = -# Application.get_env(:csv2sql, Csv2sql.MainServer)[:imported_csv_directory] - -# :timer.sleep(300) -# File.rename(file, "#{imported_csv_directory}/#{Path.basename(file)}") -# Helpers.print_msg("Finished processing file: " <> Path.basename(file), :green) -# end -# end - -# # Wait until job queue has space for the next chunk -# # by recursively calling itself. -# defp check_job_queue(file, data_chunk) do -# job_count_limit = Application.get_env(:csv2sql, Csv2sql.get_repo())[:job_count_limit] -# job_count = JobQueueServer.get_job_count() - -# if job_count > job_count_limit do -# check_job_queue(file, data_chunk) -# else -# JobQueueServer.add_data_chunk(file, data_chunk) -# end -# end -# end diff --git a/apps/csv2sql/lib/csv2sql/database/database.ex b/apps/csv2sql/lib/csv2sql/database/database.ex index eee1ba8..5e5a737 100644 --- a/apps/csv2sql/lib/csv2sql/database/database.ex +++ b/apps/csv2sql/lib/csv2sql/database/database.ex @@ -100,13 +100,14 @@ defmodule Csv2sql.Database do ProgressTracker.update_row_count(path, length(data_chunk)) end + @spec encode_binary(binary()) :: String.t() def encode_binary(str) do if Helpers.get_config(:remove_illegal_characters) do # TODO: Check if utf8mb4 is supported https://github.com/tallakt/codepagex/issues/27 {:ok, str, replaced} = Codepagex.to_string(str, :iso_8859_1, Codepagex.replace_nonexistent(""), 0) - # TODO: fix this can slow down things + # TODO: fix this it can slow down things if replaced > 0, do: Logger.warn("[#{Process.get(:file)}] Replaced #{replaced} characters in binary data") @@ -116,6 +117,13 @@ defmodule Csv2sql.Database do end end + @spec string_column_type(non_neg_integer()) :: :text | {:varchar, non_neg_integer()} + def string_column_type(max_data_length) do + if max_data_length > varchar_limit(), + do: :text, + else: {:varchar, max_data_length} + end + # Callbacks to implement @callback type_mapping(type_map()) :: String.t() diff --git a/apps/csv2sql/lib/csv2sql/database/mysql.ex b/apps/csv2sql/lib/csv2sql/database/mysql.ex index e783cd0..470c6e1 100644 --- a/apps/csv2sql/lib/csv2sql/database/mysql.ex +++ b/apps/csv2sql/lib/csv2sql/database/mysql.ex @@ -17,8 +17,7 @@ defmodule Csv2sql.Database.MySql do type_map[:is_boolean] -> "BIT" type_map[:is_integer] -> "INT" type_map[:is_float] -> "DOUBLE" - type_map[:is_text] -> "LONGTEXT" - true -> "VARCHAR(#{varchar_limit()})" + true -> type_map[:max_data_length] |> string_column_type() |> get_string_column_type() end end @@ -54,6 +53,7 @@ defmodule Csv2sql.Database.MySql do @spec get_ordering_column_type :: String.t() def get_ordering_column_type(), do: "INT UNSIGNED" + # Private helpers defp to_date_string(%DateTime{} = datetime), do: datetime |> DateTime.to_date() |> to_datetime_string() @@ -63,4 +63,8 @@ defmodule Csv2sql.Database.MySql do do: datetime |> DateTime.to_string() |> String.trim_trailing("Z") defp to_datetime_string(val), do: val + + defp get_string_column_type(:text), do: "LONGTEXT" + defp get_string_column_type({:varchar, 0}), do: "VARCHAR(#{varchar_limit()})" + defp get_string_column_type({:varchar, size}), do: "VARCHAR(#{size})" end diff --git a/apps/csv2sql/lib/csv2sql/database/postgres.ex b/apps/csv2sql/lib/csv2sql/database/postgres.ex index e173a9e..6d3de2d 100644 --- a/apps/csv2sql/lib/csv2sql/database/postgres.ex +++ b/apps/csv2sql/lib/csv2sql/database/postgres.ex @@ -17,8 +17,7 @@ defmodule Csv2sql.Database.Postgres do type_map[:is_boolean] -> "BOOLEAN" type_map[:is_integer] -> "INT" type_map[:is_float] -> "NUMERIC(1000, 100)" - type_map[:is_text] -> "TEXT" - true -> "VARCHAR(#{varchar_limit()})" + true -> type_map[:max_data_length] |> string_column_type() |> get_string_column_type() end end @@ -71,4 +70,9 @@ defmodule Csv2sql.Database.Postgres do @impl Csv2sql.Database @spec get_ordering_column_type :: String.t() def get_ordering_column_type(), do: "INT" + + # Private helpers + defp get_string_column_type(:text), do: "TEXT" + defp get_string_column_type({:varchar, 0}), do: "VARCHAR(#{varchar_limit()})" + defp get_string_column_type({:varchar, size}), do: "VARCHAR(#{size})" end diff --git a/apps/csv2sql/lib/csv2sql/db_worker.ex b/apps/csv2sql/lib/csv2sql/db_worker.ex deleted file mode 100644 index 824d075..0000000 --- a/apps/csv2sql/lib/csv2sql/db_worker.ex +++ /dev/null @@ -1,31 +0,0 @@ -# defmodule Csv2sql.DbWorker do -# use GenServer -# alias Csv2sql.{JobQueueServer, Database} - -# def start_link(_) do -# GenServer.start_link(__MODULE__, :no_args) -# end - -# def init(_) do -# Process.send_after(self(), :start_new_db_work, 0) -# {:ok, nil} -# end - -# @doc """ -# Recursively requests the job queue for work(chunks of data) -# """ -# def handle_info(:start_new_db_work, _) do -# JobQueueServer.get_work() -# |> case do -# {file, data_chunk} -> -# Database.insert_data_chunk(file, data_chunk) - -# :no_work -> -# nil -# end - -# send(self(), :start_new_db_work) - -# {:noreply, nil} -# end -# end diff --git a/apps/csv2sql/lib/csv2sql/db_worker_supervisor.ex b/apps/csv2sql/lib/csv2sql/db_worker_supervisor.ex deleted file mode 100644 index 737fa38..0000000 --- a/apps/csv2sql/lib/csv2sql/db_worker_supervisor.ex +++ /dev/null @@ -1,15 +0,0 @@ -# defmodule Csv2sql.DbWorkerSupervisor do -# use DynamicSupervisor - -# def start_link(_) do -# DynamicSupervisor.start_link(__MODULE__, :no_args, name: __MODULE__) -# end - -# def init(:no_args) do -# DynamicSupervisor.init(strategy: :one_for_one) -# end - -# def add_worker() do -# {:ok, _pid} = DynamicSupervisor.start_child(__MODULE__, Csv2sql.DbWorker) -# end -# end diff --git a/apps/csv2sql/lib/csv2sql/error_tracker.ex b/apps/csv2sql/lib/csv2sql/error_tracker.ex deleted file mode 100644 index bcb1019..0000000 --- a/apps/csv2sql/lib/csv2sql/error_tracker.ex +++ /dev/null @@ -1,55 +0,0 @@ -# defmodule Csv2sql.ErrorTracker do -# use GenServer -# alias Csv2sql.{Observer, Helpers} - -# def register_supervisor(sup_pid) do -# GenServer.cast(:error_tracker, {:register_supervisor, sup_pid}) -# end - -# def add_error(error) do -# """ -# AN ERROR OCCURED AND FURTHER PROCESSING WAS STOPPED: - -# #{inspect(error)} -# """ -# |> Helpers.print_msg(:red) - -# Observer.change_stage(:error) - -# # Call genserver not cast since, we need the wait synchronously untill supervisor is stopped -# GenServer.call(:error_tracker, {:add_error, error}) -# end - -# def get_errors() do -# GenServer.call(:error_tracker, :get_errors) -# end - -# def reset_errors() do -# GenServer.cast(:error_tracker, :reset_state) -# end - -# def start_link(_) do -# GenServer.start_link(__MODULE__, :no_args, name: :error_tracker) -# end - -# def init(_) do -# {:ok, %{errors: [], sup_pid: nil}} -# end - -# def handle_call({:add_error, error}, _from, %{errors: errors, sup_pid: sup_pid}) do -# sup_pid && Supervisor.stop(sup_pid) -# {:reply, nil, %{errors: errors ++ [error], sup_pid: nil}} -# end - -# def handle_call(:get_errors, _from, %{errors: errors} = state) do -# {:reply, errors, state} -# end - -# def handle_cast({:register_supervisor, sup_pid}, state) do -# {:noreply, Map.put(state, :sup_pid, sup_pid)} -# end - -# def handle_cast(:reset_state, _state) do -# {:noreply, %{errors: [], sup_pid: nil}} -# end -# end diff --git a/apps/csv2sql/lib/csv2sql/import_validator.ex b/apps/csv2sql/lib/csv2sql/import_validator.ex deleted file mode 100644 index ec22821..0000000 --- a/apps/csv2sql/lib/csv2sql/import_validator.ex +++ /dev/null @@ -1,139 +0,0 @@ -# defmodule Csv2sql.ImportValidator do -# alias NimbleCSV.RFC4180, as: CSV -# alias Csv2sql.Helpers - -# @doc """ -# Validates import by comparing row count in csv vs row count in database -# """ -# def validate_import(file_list) do -# %{stats: {total, correct, incorrect}, incorrect_files: incorrect_files} = -# file_list -# |> Enum.reduce( -# %{stats: {0, 0, 0}, incorrect_files: []}, -# fn {_file_path, %Csv2sql.File{name: file, row_count: row_count}}, -# %{stats: {total, correct, incorrect}, incorrect_files: incorrect_files} -> -# Helpers.print_msg("Checking File: #{Path.basename(file)}", :yellow) - -# main_server_config = Application.get_env(:csv2sql, Csv2sql.MainServer) - -# src = -# if Application.get_env(:csv2sql, Csv2sql.Worker)[:set_insert_data], -# do: main_server_config[:imported_csv_directory], -# else: main_server_config[:source_csv_directory] - -# result = -# if validate_csv(file, row_count) do -# File.rename( -# "#{src}/#{file}.csv", -# "#{main_server_config[:validated_csv_directory]}/#{file}.csv" -# ) - -# %{ -# stats: {total + 1, correct + 1, incorrect}, -# incorrect_files: incorrect_files -# } -# else -# %{ -# stats: {total + 1, correct, incorrect + 1}, -# incorrect_files: incorrect_files ++ [file] -# } -# end - -# result -# end -# ) - -# show_validation_results(total, correct, incorrect, incorrect_files) -# {incorrect, incorrect_files} -# end - -# @doc """ -# Get row count in csv file -# """ -# def get_count_from_csv(file) do -# file -# |> File.stream!() -# |> CSV.parse_stream() -# |> Enum.count() -# end - -# defp validate_csv(file, row_count) do -# db_count = get_db_count(file) - -# white = IO.ANSI.white() - -# Helpers.print_msg("Count in csv: #{white}#{row_count}") -# Helpers.print_msg("Count in database: #{white}#{db_count}") - -# if row_count == db_count do -# """ -# Correct ! - -# """ -# |> Helpers.print_msg(:green) - -# true -# else -# """ -# Incorrect ! - -# """ -# |> Helpers.print_msg(:red) - -# false -# end -# end - -# defp get_db_count(file) do -# table_name = -# file -# |> Path.basename() -# |> String.trim_trailing(".csv") - -# require Ecto.Query - -# try do -# Ecto.Query.from(p in table_name, select: count("*")) -# |> Csv2sql.get_repo().one( -# prefix: -# if(Csv2sql.get_db_type() == :mysql, -# do: Application.get_env(:csv2sql, Csv2sql.get_repo())[:database_name] -# ) -# ) -# catch -# _, _ -> -# Helpers.print_msg("An exception occured !", :red) -# "#{IO.ANSI.red()}✗#{IO.ANSI.reset()}" -# -1 -# end -# end - -# defp show_validation_results(total, correct, incorrect, incorrect_files) do -# """ - -# #{IO.ANSI.underline()}Validation Completed:#{IO.ANSI.no_underline()} - -# """ -# |> Helpers.print_msg(:white) - -# white = IO.ANSI.white() - -# Helpers.print_msg("* Number of files checked: #{white}#{total}") -# Helpers.print_msg("* Number of files with correct count: #{white}#{correct}") -# Helpers.print_msg("* Number of files with incorrect count: #{white}#{incorrect}") - -# if incorrect > 0 do -# """ - - -# Files with incorrect count: -# """ -# |> Helpers.print_msg(:white) - -# incorrect_files -# |> Enum.each(fn file -> -# Helpers.print_msg("* #{Path.basename(file)}", :red) -# end) -# end -# end -# end diff --git a/apps/csv2sql/lib/csv2sql/job_queue_server.ex b/apps/csv2sql/lib/csv2sql/job_queue_server.ex deleted file mode 100644 index 123a708..0000000 --- a/apps/csv2sql/lib/csv2sql/job_queue_server.ex +++ /dev/null @@ -1,59 +0,0 @@ -# defmodule Csv2sql.JobQueueServer do -# use GenServer - -# def start_link(_) do -# GenServer.start_link(__MODULE__, :no_args, name: __MODULE__) -# end - -# def init(_) do -# {:ok, []} -# end - -# def add_data_chunk(file, data_chunk) do -# GenServer.cast(__MODULE__, {:add_new_data_chunk, file, data_chunk}) -# end - -# def get_work() do -# GenServer.call(__MODULE__, :get_work, :infinity) -# end - -# def get_job_count() do -# GenServer.call(__MODULE__, :get_job_count, :infinity) -# end - -# def job_for_file_present(file) do -# GenServer.call(__MODULE__, {:job_for_file_present, file}, :infinity) -# end - -# def handle_cast({:add_new_data_chunk, file, data_chunk}, state) do -# new_state = state ++ [{file, data_chunk}] -# {:noreply, new_state} -# end - -# def handle_call(:get_work, _from, state) do -# state -# |> case do -# [data | new_state] -> -# {:reply, data, new_state} - -# [] -> -# {:reply, :no_work, state} - -# nil -> -# {:reply, :no_work, state} -# end -# end - -# def handle_call(:get_job_count, _from, state) do -# {:reply, Enum.count(state), state} -# end - -# def handle_call({:job_for_file_present, file}, _from, state) do -# file_present = -# Enum.any?(state, fn {file_job, _data_chunk} -> -# file == file_job -# end) - -# {:reply, file_present, state} -# end -# end diff --git a/apps/csv2sql/lib/csv2sql/main_server.ex b/apps/csv2sql/lib/csv2sql/main_server.ex deleted file mode 100644 index 16836d6..0000000 --- a/apps/csv2sql/lib/csv2sql/main_server.ex +++ /dev/null @@ -1,151 +0,0 @@ -# defmodule Csv2sql.MainServer do -# use GenServer -# alias Csv2sql.{JobQueueServer, Database, Observer} - -# def start_link(_) do -# GenServer.start_link(__MODULE__, :no_args, name: __MODULE__) -# end - -# def work_done() do -# GenServer.cast(__MODULE__, :done) -# end - -# # Starts the server with initial state set to worker_count -# # The init function uses send_after to tell the runtime to queue a message to this -# # server immediately (that is, after waiting 0 ms). When the init function exits, the -# # server is then free to pick up this message, which triggers the handle_info -# # callback, and the workers get started -# def init(_) do -# make_directories_if_not_present() -# Process.send_after(self(), :kickoff, 0) -# worker_count = Application.get_env(:csv2sql, Csv2sql.MainServer)[:worker_count] -# {:ok, worker_count} -# end - -# def handle_info(:kickoff, worker_count) do -# worker_config = Application.get_env(:csv2sql, Csv2sql.Worker) -# main_server_config = Application.get_env(:csv2sql, Csv2sql.MainServer) - -# if worker_config[:set_insert_schema] || worker_config[:set_insert_data], -# do: Database.prepare_db() - -# if Application.get_env(:csv2sql, Csv2sql.Worker)[:set_make_schema], -# do: File.rm("#{main_server_config[:db_worker_count]}/schema.sql") - -# 1..worker_count -# |> Enum.each(fn _ -> Csv2sql.WorkerSupervisor.add_worker() end) - -# 1..main_server_config[:db_worker_count] -# |> Enum.each(fn _ -> Csv2sql.DbWorkerSupervisor.add_worker() end) - -# {:noreply, worker_count} -# end - -# def handle_cast(:done, 1) do -# Observer.update_active_worker_count(0) - -# wait_for_pending_jobs() - -# :timer.sleep(1000) - -# start_validation_message() - -# if Application.get_env(:csv2sql, Csv2sql.MainServer)[:set_validate] do -# Observer.change_stage(:validation) - -# Csv2sql.Helpers.print_msg("\nValidation Process Started...\n\n", :green) - -# %{file_list: file_list} = Csv2sql.Observer.get_stats() - -# Csv2sql.ImportValidator.validate_import(file_list) -# |> Csv2sql.Observer.update_validation_status() -# else -# Csv2sql.Observer.update_validation_status({:skipped, nil}) -# Csv2sql.Helpers.print_msg("\nValidation Process Skipped...\n\n", :green) -# end - -# pretty_print_time_take() - -# :timer.sleep(100) - -# Observer.change_stage(:finish) - -# {:noreply, 0} -# end - -# def handle_cast(:done, worker_count) do -# Csv2sql.Observer.update_active_worker_count(worker_count - 1) -# {:noreply, worker_count - 1} -# end - -# defp wait_for_pending_jobs() do -# if JobQueueServer.get_job_count() > 0, do: wait_for_pending_jobs(), else: nil -# end - -# defp pretty_print_time_take() do -# %{start_time: start_time} = Csv2sql.Observer.get_stats() - -# time_taken = -# DateTime.utc_now() -# |> Time.diff(start_time, :millisecond) -# |> Kernel./(1000) -# |> Float.round() - -# """ - -# ---------------------------------------- - -# FINISHED !!! - -# The operation took #{time_taken} seconds - - -# ---------------------------------------- - -# """ -# |> Csv2sql.Helpers.print_msg(:green) -# end - -# defp make_directories_if_not_present() do -# source_csv_directory = -# Application.get_env(:csv2sql, Csv2sql.MainServer)[:source_csv_directory] - -# imported_csv_directory = -# Application.get_env(:csv2sql, Csv2sql.MainServer)[:imported_csv_directory] - -# validated_csv_directory = -# Application.get_env(:csv2sql, Csv2sql.MainServer)[:validated_csv_directory] - -# if source_csv_directory && !File.exists?(source_csv_directory) do -# Csv2sql.Helpers.print_msg("ERROR: csv source directory does not exists !", :red) -# System.halt(0) -# end - -# if imported_csv_directory, do: File.mkdir(imported_csv_directory) - -# if validated_csv_directory, do: File.mkdir(validated_csv_directory) -# end - -# defp start_validation_message() do -# # Add line break -# IO.puts("----------------------------------------") -# IO.puts("") - -# CliSpinners.spin_fun( -# [ -# frames: :arrow2, -# text: "Finished importing CSVs...", -# done: "" -# ], -# fn -> -# CliSpinners.spin( -# frames: :clock, -# text: "Staring Validation Process...", -# done: "Staring Validation Process..." -# ) - -# :timer.sleep(3000) -# end -# ) -# end -# end diff --git a/apps/csv2sql/lib/csv2sql/schema_maker.ex b/apps/csv2sql/lib/csv2sql/schema_maker.ex deleted file mode 100644 index 1a6f5ef..0000000 --- a/apps/csv2sql/lib/csv2sql/schema_maker.ex +++ /dev/null @@ -1,267 +0,0 @@ -# defmodule Csv2sql.SchemaMaker do -# alias NimbleCSV.RFC4180, as: CSV -# alias Csv2sql.Observer - -# @doc """ -# Writes the DDL queries in file -# """ -# def make_schema(file_path) do -# [drop_query, create_query] = -# get_types("anc") -# |> get_ddl_queries(file_path) - -# query = """ - -# #{drop_query} - -# #{create_query} - -# """ - -# schema_file_path = Application.get_env(:csv2sql, Csv2sql.SchemaMaker)[:schema_file_path] -# File.write("#{schema_file_path}/schema.sql", query, [:append]) -# Csv2sql.Helpers.print_msg("Infer Schema for: #{Path.basename(file_path)}") -# [drop_query, create_query] -# end - -# @doc """ -# Check type of data -# """ -# def check_type(item, type) do -# item = String.trim(item) -# empty = is_empty?(item) - -# if empty do -# Map.put(type, :is_empty, type.is_empty && empty) -# else -# is_date = type.is_date && is_date?(item) -# is_datetime = type.is_datetime && is_datetime?(item) -# is_integer = type.is_integer && is_integer?(item) -# is_float = type.is_float && is_float?(item) -# is_boolean = type.is_boolean && is_boolean?(item) -# is_text = type.is_text || is_text?(item) - -# %{ -# is_empty: type.is_empty && empty, -# is_date: is_date, -# is_datetime: is_datetime, -# is_boolean: is_boolean, -# is_integer: is_integer, -# is_float: is_float, -# is_text: is_text -# } -# end -# end - -# def infer_type(chunk, headers_type_list) do -# Enum.reduce(chunk, headers_type_list, fn cols, type_list -> -# for {item, item_type_map} <- Enum.zip(cols, type_list) do -# check_type(item, item_type_map) -# end -# end) -# end - -# defp get_ddl_queries(types, file_path) do -# db_type = :mysql - -# database = -# if db_type == :postgres, -# do: "", -# else: "#{Application.get_env(:csv2sql, Csv2sql.get_repo())[:database_name]}." - -# table_name = -# if db_type == :postgres, -# do: "\"#{get_table_name(file_path)}\"", -# else: get_table_name(file_path) - -# create_table = -# types -# |> Enum.reduce( -# "CREATE TABLE #{database}#{table_name} (", -# fn {column_name, type}, query -> -# column_name = -# if db_type == :postgres, -# do: "\"#{column_name}\"", -# else: "`#{column_name}`" - -# query <> "#{column_name} #{type}, " -# end -# ) -# |> String.trim_trailing(", ") -# |> Kernel.<>(");") - -# ["DROP TABLE IF EXISTS #{database}#{table_name};", "#{create_table}"] -# end - -# def get_types(path) do -# headers = get_headers(path) -# varchar_limit = 200 -# headers_type_list = List.duplicate(get_type_map(), Enum.count(headers)) - -# schema_infer_chunk_size = 100 - -# db_type = :mysql - -# types = -# path -# |> File.stream!() -# |> CSV.parse_stream() -# |> Stream.chunk_every(schema_infer_chunk_size) -# |> Task.async_stream(__MODULE__, :infer_type, [headers_type_list], -# timeout: :infinity, -# ordered: false -# ) -# |> Enum.reduce(headers_type_list, fn {:ok, result}, acc -> -# # Here we get a list of type maps for each chunk of data -# # We need to merge theses type maps obtained from each chunk - -# for {acc_map, result_map} <- Enum.zip(acc, result) do -# %{ -# is_empty: acc_map.is_empty && result_map.is_empty, -# is_date: acc_map.is_date && result_map.is_date, -# is_datetime: acc_map.is_datetime && result_map.is_datetime, -# is_boolean: acc_map.is_boolean && result_map.is_boolean, -# is_integer: acc_map.is_integer && result_map.is_integer, -# is_float: acc_map.is_float && result_map.is_float, -# is_text: acc_map.is_text || result_map.is_text -# } -# end -# end) -# |> Enum.with_index() -# |> Enum.reduce(%{}, fn {type, index}, acc -> -# header = Enum.at(headers, index) - -# type = get_column_types(db_type, varchar_limit, type) - -# Map.put(acc, header, type) -# end) -# |> header_map_to_list(headers) - -# # Observer.set_schema(path, types) - -# types -# end - -# defp get_column_types(:postgres, varchar_limit, type) do -# cond do -# type[:is_empty] -> "VARCHAR(#{varchar_limit})" -# type[:is_boolean] -> "BOOLEAN" -# type[:is_integer] -> "INT" -# type[:is_float] -> "NUMERIC(1000, 100)" -# type[:is_text] -> "TEXT" -# true -> "VARCHAR(#{varchar_limit})" -# end -# end - -# defp get_column_types(:mysql, varchar_limit, type) do -# cond do -# type[:is_empty] -> "VARCHAR(#{varchar_limit})" -# type[:is_date] -> "DATE" -# type[:is_datetime] -> "DATETIME" -# type[:is_boolean] -> "BIT" -# type[:is_integer] -> "INT" -# type[:is_float] -> "DOUBLE" -# type[:is_text] -> "TEXT" -# true -> "VARCHAR(#{varchar_limit})" -# end -# end - -# defp get_headers(path) do -# [headers] = -# path -# |> File.stream!() -# |> Stream.take(1) -# |> CSV.parse_stream(skip_headers: false) -# |> Enum.to_list() - -# headers -# end - -# defp get_type_map() do -# %{ -# is_empty: true, -# is_date: true, -# is_datetime: true, -# is_boolean: true, -# is_integer: true, -# is_float: true, -# is_text: false -# } -# end - -# defp header_map_to_list(header_map, headers) do -# Enum.reduce(headers, [], fn header, acc -> -# acc ++ [{header, header_map[header]}] -# end) -# end - -# defp is_empty?(item) do -# item == "" -# end - -# defp is_date?(item) do -# Csv2sql.Helpers.get_config(:date_patterns) -# |> Enum.any?(fn pattern -> -# case Timex.parse(item, pattern) do -# {:ok, _} -> true -# {:error, _} -> false -# end -# end) -# end - -# defp is_datetime?(item) do -# Csv2sql.Helpers.get_config(:datetime_patterns) -# |> Enum.any?(fn pattern -> -# case Timex.parse(item, pattern) do -# {:ok, _} -> true -# {:error, _} -> false -# end -# end) -# end - -# defp is_boolean?(item) do -# if item in ["true", "false"] do -# true -# else -# item -# |> Integer.parse() -# |> case do -# {1, ""} -> true -# {0, ""} -> true -# _ -> false -# end -# end -# end - -# defp is_integer?(item) do -# case Integer.parse(item) do -# {item, ""} -> if item > -2_147_483_648 && item < 2_147_483_647, do: true, else: false -# _ -> false -# end -# end - -# defp is_float?(item) do -# try do -# case Float.parse(item) do -# {_, ""} -> -# true - -# _ -> -# false -# end -# rescue -# _e in ArgumentError -> false -# end -# end - -# defp is_text?(item) do -# varchar_limit = Csv2sql.Helpers.get_config(:varchar_limit) -# if String.length(item) > varchar_limit, do: true, else: false -# end - -# defp get_table_name(file_path) do -# file_path -# |> Path.basename() -# |> String.trim_trailing(".csv") -# end -# end diff --git a/apps/csv2sql/lib/csv2sql/type_deducer/type_checker.ex b/apps/csv2sql/lib/csv2sql/type_deducer/type_checker.ex index cc295dc..4cd6f23 100644 --- a/apps/csv2sql/lib/csv2sql/type_deducer/type_checker.ex +++ b/apps/csv2sql/lib/csv2sql/type_deducer/type_checker.ex @@ -21,7 +21,7 @@ defmodule Csv2sql.TypeDeducer.TypeChecker do is_boolean: existing_type_map.is_boolean && is_boolean?(item), is_integer: existing_type_map.is_integer && is_integer?(item), is_float: existing_type_map.is_float && is_float?(item), - is_text: existing_type_map.is_text || is_text?(item) + max_data_length: max(existing_type_map.max_data_length, String.length(item)) } end end @@ -72,11 +72,6 @@ defmodule Csv2sql.TypeDeducer.TypeChecker do end end - defp is_text?(item) do - varchar_limit = Helpers.get_config(:varchar_limit) - if String.length(item) > varchar_limit, do: true, else: false - end - defp parse_datetime_pattern(datetime_string, pattern) do case Timex.parse(datetime_string, pattern) do {:ok, _} -> true diff --git a/apps/csv2sql/lib/csv2sql/type_deducer/type_deducer.ex b/apps/csv2sql/lib/csv2sql/type_deducer/type_deducer.ex index 9a5addd..d1f8b7e 100644 --- a/apps/csv2sql/lib/csv2sql/type_deducer/type_deducer.ex +++ b/apps/csv2sql/lib/csv2sql/type_deducer/type_deducer.ex @@ -92,7 +92,7 @@ defmodule Csv2sql.TypeDeducer do is_boolean: true, is_integer: true, is_float: true, - is_text: false + max_data_length: 0 } |> List.duplicate(Enum.count(headers)) end @@ -106,7 +106,7 @@ defmodule Csv2sql.TypeDeducer do is_boolean: acc_map.is_boolean && current_map.is_boolean, is_integer: acc_map.is_integer && current_map.is_integer, is_float: acc_map.is_float && current_map.is_float, - is_text: acc_map.is_text || current_map.is_text + max_data_length: max(acc_map.max_data_length, current_map.max_data_length) } end end diff --git a/apps/csv2sql/lib/csv2sql/types/types.ex b/apps/csv2sql/lib/csv2sql/types/types.ex index 89ae375..0acd108 100644 --- a/apps/csv2sql/lib/csv2sql/types/types.ex +++ b/apps/csv2sql/lib/csv2sql/types/types.ex @@ -12,7 +12,7 @@ defmodule Csv2sql.Types do is_boolean: boolean(), is_integer: boolean(), is_float: boolean(), - is_text: boolean() + max_data_length: non_neg_integer() } @type csv_col_types_list() :: list({String.t(), String.t()}) diff --git a/apps/csv2sql/lib/csv2sql/worker.ex b/apps/csv2sql/lib/csv2sql/worker.ex deleted file mode 100644 index 6407bfc..0000000 --- a/apps/csv2sql/lib/csv2sql/worker.ex +++ /dev/null @@ -1,79 +0,0 @@ -# defmodule Csv2sql.Worker do -# use GenServer -# alias Csv2sql.{MainServer, Observer} - -# def start_link(_) do -# GenServer.start_link(__MODULE__, :no_args) -# end - -# def init(_) do -# Process.send_after( -# self(), -# {:start_new_work, Application.get_env(:csv2sql, Csv2sql.Worker)}, -# 0 -# ) - -# {:ok, nil} -# end - -# def handle_info( -# {:start_new_work, -# work_config = [ -# set_make_schema: set_make_schema, -# set_insert_schema: set_insert_schema, -# set_insert_data: set_insert_data -# ]}, -# _ -# ) do -# {file, row_count} = Observer.next_file() - -# if file do -# if set_make_schema do -# Observer.update_file_status(file, :infer_schema) -# result = make_schema(file) - -# if set_insert_schema do -# Observer.update_file_status(file, :insert_schema) -# insert_schema(result) -# end -# end - -# if set_insert_data && row_count != 0 do -# Observer.update_file_status(file, :insert_data) -# insert_data(file) -# end - -# if set_insert_data && row_count == 0, do: handle_empty_file(file) -# send(self(), {:start_new_work, work_config}) -# {:noreply, nil} -# else -# MainServer.work_done() -# {:noreply, nil} -# end -# end - -# defp make_schema(file) do -# queries = Csv2sql.SchemaMaker.make_schema(file) -# {file, queries} -# end - -# defp insert_schema({file, queries}) do -# Csv2sql.Database.make_db_schema(queries) -# file -# end - -# defp insert_data(file) do -# Csv2sql.DataTransfer.process_file(file) -# file -# end - -# # Handle csvs having 0 rows, change status to finish and move to imported directory -# defp handle_empty_file(file) do -# Observer.update_file_status(file, :finish) - -# File.rename!( -# file, -# "#{Application.get_env(:csv2sql, MainServer)[:imported_csv_directory]}/#{Path.basename(file)}" -# ) -# end -# end diff --git a/apps/csv2sql/lib/csv2sql/worker_supervisor.ex b/apps/csv2sql/lib/csv2sql/worker_supervisor.ex deleted file mode 100644 index 5be5c20..0000000 --- a/apps/csv2sql/lib/csv2sql/worker_supervisor.ex +++ /dev/null @@ -1,15 +0,0 @@ -# defmodule Csv2sql.WorkerSupervisor do -# use DynamicSupervisor - -# def start_link(_) do -# DynamicSupervisor.start_link(__MODULE__, :no_args, name: __MODULE__) -# end - -# def init(:no_args) do -# DynamicSupervisor.init(strategy: :one_for_one) -# end - -# def add_worker() do -# {:ok, _pid} = DynamicSupervisor.start_child(__MODULE__, Csv2sql.Worker) -# end -# end diff --git a/apps/csv2sql/test/database/database_test.exs b/apps/csv2sql/test/database/database_test.exs index 7f96d46..7c2705d 100644 --- a/apps/csv2sql/test/database/database_test.exs +++ b/apps/csv2sql/test/database/database_test.exs @@ -11,7 +11,7 @@ defmodule Csv2sql.DatabaseTest do is_boolean: false, is_integer: false, is_float: false, - is_text: false + max_data_length: 0 } setup_all do @@ -62,15 +62,13 @@ defmodule Csv2sql.DatabaseTest do is_datetime: "DATETIME", is_boolean: "BIT", is_integer: "INT", - is_float: "DOUBLE", - is_text: "LONGTEXT" + is_float: "DOUBLE" } db_type_mappings = if db_type == :postgres, do: Map.merge(db_type_mappings, %{ - is_text: "TEXT", is_datetime: "TIMESTAMP", is_boolean: "BOOLEAN", is_float: "NUMERIC(1000, 100)" @@ -78,8 +76,11 @@ defmodule Csv2sql.DatabaseTest do else: db_type_mappings @type_map - |> Enum.each(fn {key, _value} -> - updated_type_map = @type_map |> Map.put(key, true) + |> Map.keys() + |> Kernel.++([:max_data_length]) + |> Enum.each(fn key -> + type_value = if(key == :max_data_length, do: 0, else: true) + updated_type_map = Map.put(@type_map, key, type_value) assert Database.get_db_type(updated_type_map) == Map.get(db_type_mappings, key, "VARCHAR(#{varchar_limit})") diff --git a/apps/csv2sql/test/type_deducer/type_checker_test.exs b/apps/csv2sql/test/type_deducer/type_checker_test.exs index 22681b1..c1bbd6a 100644 --- a/apps/csv2sql/test/type_deducer/type_checker_test.exs +++ b/apps/csv2sql/test/type_deducer/type_checker_test.exs @@ -9,7 +9,7 @@ defmodule Csv2sql.Config.TypeCheckerTest do is_boolean: true, is_integer: true, is_float: true, - is_text: false + max_data_length: 0 } @date_patterns_to_tests [ @@ -60,7 +60,7 @@ defmodule Csv2sql.Config.TypeCheckerTest do is_boolean: false, is_integer: false, is_float: false, - is_text: false + max_data_length: String.length(value) } end) @@ -75,7 +75,7 @@ defmodule Csv2sql.Config.TypeCheckerTest do is_boolean: false, is_integer: false, is_float: false, - is_text: false + max_data_length: String.length(value) } end) end @@ -92,7 +92,7 @@ defmodule Csv2sql.Config.TypeCheckerTest do is_boolean: false, is_integer: false, is_float: false, - is_text: false + max_data_length: 0 } end) end @@ -116,7 +116,7 @@ defmodule Csv2sql.Config.TypeCheckerTest do is_boolean: false, is_integer: false, is_float: false, - is_text: false + max_data_length: String.length(value) } end) @@ -131,7 +131,7 @@ defmodule Csv2sql.Config.TypeCheckerTest do is_boolean: false, is_integer: false, is_float: false, - is_text: false + max_data_length: String.length(value) } end) end @@ -150,7 +150,7 @@ defmodule Csv2sql.Config.TypeCheckerTest do is_boolean: false, is_integer: false, is_float: false, - is_text: false + max_data_length: 0 } end ) @@ -229,11 +229,11 @@ defmodule Csv2sql.Config.TypeCheckerTest do ) [ - {"abc", false}, - {"12345678901", true}, - {"", false} + {"abc", 3}, + {"12345678901", 11}, + {"", 0} ] - |> run_assertions(:is_text) + |> run_assertions(:max_data_length) end # Private test helper functions @@ -259,6 +259,9 @@ defmodule Csv2sql.Config.TypeCheckerTest do {value, false} -> refute Map.get(TypeChecker.check_type(value, @initial_type_map), key) + + {value, number} -> + assert Map.get(TypeChecker.check_type(value, @initial_type_map), key) == number end) end end diff --git a/apps/csv2sql/test/type_deducer/type_deducer_test.exs b/apps/csv2sql/test/type_deducer/type_deducer_test.exs index e6b8389..105613e 100644 --- a/apps/csv2sql/test/type_deducer/type_deducer_test.exs +++ b/apps/csv2sql/test/type_deducer/type_deducer_test.exs @@ -22,7 +22,7 @@ defmodule Csv2sql.TypeDeducerTest do col_type_defs = [ {"id", "INT"}, - {"name", "VARCHAR(120)"}, + {"name", "VARCHAR(10)"}, {"description", if(db_type == :mysql, do: "LONGTEXT", else: "TEXT")}, {"salary", if(db_type == :mysql, do: "DOUBLE", else: "NUMERIC(1000, 100)")}, {"permanent", if(db_type == :mysql, do: "BIT", else: "BOOLEAN")}, diff --git a/apps/dashboard/lib/dashboard/config.ex b/apps/dashboard/lib/dashboard/config.ex index 237f4e2..14f7af2 100644 --- a/apps/dashboard/lib/dashboard/config.ex +++ b/apps/dashboard/lib/dashboard/config.ex @@ -145,8 +145,8 @@ defmodule DashBoard.Config do "C:/Users/#{username}/Desktop" {:unix, _} -> - {op, _exit_code} = System.cmd("eval", ["echo ~$USER"]) - String.trim(op) + {op, _exit_code} = System.shell("echo $USER") + "/home/#{String.trim(op)}" end end end