From 947020aadca7e2e5d1d43b0f1ed28052b72a2de7 Mon Sep 17 00:00:00 2001 From: tonextone Date: Sat, 28 Dec 2019 20:34:21 +0900 Subject: [PATCH 1/5] start --- zendesk-helpcenter-export.rb | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/zendesk-helpcenter-export.rb b/zendesk-helpcenter-export.rb index 99e7642..4b3f7a5 100755 --- a/zendesk-helpcenter-export.rb +++ b/zendesk-helpcenter-export.rb @@ -1,3 +1,4 @@ +# coding: utf-8 require 'rubygems' require 'httparty' require 'fileutils' @@ -110,6 +111,9 @@ def to_json! def create_table_of_contents! File.open("./index.html", "w+") { |f| f.puts main_overview_file } + + # ここで各階層の index.html を生成すれば良い。 + end # Section: Article content @@ -120,6 +124,9 @@ def article_html_content(article) # and replace all image links towards the local url regex_find = /https:\/\/.+?zendesk.com.+?article_attachments\/(\d+?)\/(.+)\.(.+?)" alt/ regex_replace = output_type == :slugified ? '\1-\2.\3" alt' : '\1.\3" alt' + + # ここでコンテンツを加工できる。 + boiler_plate_html do """

#{article['name']}

From b74728d64123002158a4db2a1ddcdbad75fef607 Mon Sep 17 00:00:00 2001 From: tonextone Date: Mon, 30 Dec 2019 15:50:09 +0900 Subject: [PATCH 2/5] done --- zendesk-helpcenter-export.rb | 123 ++++++++++++++++++++++++++--------- 1 file changed, 93 insertions(+), 30 deletions(-) diff --git a/zendesk-helpcenter-export.rb b/zendesk-helpcenter-export.rb index 4b3f7a5..548e880 100755 --- a/zendesk-helpcenter-export.rb +++ b/zendesk-helpcenter-export.rb @@ -65,7 +65,7 @@ def initialize(options) @log_level = options[:log_level] @output_type = options[:output_type] # used to make one big dumpfile of all metadata related to your helpcenter - @raw_data = {categories: [], sections: [], articles: [], article_attachments: []} + @raw_data = {categories: []} # configure Httparty base uri self.class.base_uri "https://#{options[:subdomain]}.zendesk.com" end @@ -78,29 +78,40 @@ def to_html! categories['categories'].each do |category| log(category['name'].upcase) - @raw_data[:categories] << category + category_dir = dir_path(category) + category_file_path = "#{category_dir}index.html" + category[:sections] = [] sections(category['id'])['sections'].each do |section| - @raw_data[:sections] << section log(" #{section['name']}") + section_dir = dir_path(category, section) + section_file_path = "#{section_dir}index.html" + section[:articles] = [] articles(section['id'])['articles'].each do |article| log(" #{article['name']}", :standard) - article_dir = dir_path(category, section, article) - file_path = "#{article_dir}index.html" - article['backup_path'] = file_path - @raw_data[:articles] << article - - File.open(file_path, "w+") { |f| f.puts article_html_content(article) } + article_file_path = "#{article_dir}index.html" - article_attachments(article['id'])['article_attachments'].each do |article_attachment| - @raw_data[:article_attachments] << article_attachment + article[:attachments] = [] + article_attachments(article['id'])['article_attachments'].each do |attachment| + article[:attachments] << attachment # optimization, do not download attachment when already present (we could check based on the id) - download_attachment!(article_attachment, article_dir) + download_attachment!(attachment, article_dir) end + + article['backup_path'] = article_file_path + section[:articles] << article + File.open(article_file_path, "w+") { |f| f.puts article_html_content(article) } + end + + section['backup_path'] = section_file_path + category[:sections] << section end + + category['backup_path'] = category_file_path + @raw_data[:categories] << category end end @@ -110,10 +121,9 @@ def to_json! end def create_table_of_contents! - File.open("./index.html", "w+") { |f| f.puts main_overview_file } - - # ここで各階層の index.html を生成すれば良い。 - + all_overview_files.each do |path, html| + File.open("#{path}", "w+") { |f| f.puts html } + end end # Section: Article content @@ -124,9 +134,10 @@ def article_html_content(article) # and replace all image links towards the local url regex_find = /https:\/\/.+?zendesk.com.+?article_attachments\/(\d+?)\/(.+)\.(.+?)" alt/ regex_replace = output_type == :slugified ? '\1-\2.\3" alt' : '\1.\3" alt' - - # ここでコンテンツを加工できる。 - + + # TODO: Enable internal links + # Replace "https://xxxx.zendesk.com/hc/..." → /
/
/... + boiler_plate_html do """

#{article['name']}

@@ -136,26 +147,78 @@ def article_html_content(article) end def main_overview_file + root_overview_file(true) + end + + def root_overview_file(recursive = false) boiler_plate_html do content = [] - + content << "
    " raw_data[:categories].each do |cat| - content << "

    #{cat['name']}

    " - raw_data[:sections].each do |section| - next if section["category_id"] != cat['id'] - content << "#{section["name"]}
    " - content << "
      " - raw_data[:articles].each do |article| - next if article["section_id"] != section['id'] - content << "
    • #{article['name']}
    • " - end - content << "
    " + content << "
  • " + content << "

    #{cat['name']}

    " + if recursive == true + content << category_overview_file(cat, recursive) end + content << "
  • " end + content << "
" content.join("\n") end end + def category_overview_file(category, recursive = false) + boiler_plate_html do + content = [] + content << "

#{category['name']}

" if !recursive + content << "
    " + category[:sections].each do |section| + content << "
  • " + if recursive == true + content << "

    #{section['name']}

    " + content << section_overview_file(section, recursive) + else + content << "

    #{section['name']}

    " + end + content << "
  • " + end + content << "
" + content.join("\n") + end + end + + def section_overview_file(section, recursive = false) + boiler_plate_html do + content = [] + content << "

#{section['name']}

" if !recursive + content << "
    " + section[:articles].each do |article| + content << "
  • " + if recursive == true + content << "

    #{article['name']}

    " + else + content << "

    #{article['name']}

    " + end + content << "
  • " + end + content << "
" + content.join("\n") + end + end + + def all_overview_files + files = {'./index.html': main_overview_file} + + raw_data[:categories].each do |cat| + files[cat['backup_path']] = category_overview_file(cat) + cat[:sections].each do |section| + files[section['backup_path']] = section_overview_file(section) + end + end + + files + end + def boiler_plate_html &block """ From 78ac3aebdd92e80f0e0608ad29fbac5c9d03e359 Mon Sep 17 00:00:00 2001 From: tonextone Date: Thu, 2 Jan 2020 02:33:10 +0900 Subject: [PATCH 3/5] Localize URLs in all articles --- zendesk-helpcenter-export.rb | 169 ++++++++++++++++++++++++----------- 1 file changed, 118 insertions(+), 51 deletions(-) diff --git a/zendesk-helpcenter-export.rb b/zendesk-helpcenter-export.rb index 548e880..17b5ca7 100755 --- a/zendesk-helpcenter-export.rb +++ b/zendesk-helpcenter-export.rb @@ -74,45 +74,75 @@ def initialize(options) # --------------------------------------- def to_html! - return if api_error?(categories) + log("\n Fetching Zendesk Guide contents ... \n\n", :standard) + + _c = categories + return if !_c || api_error?(_c) + + _c['categories'].each_with_index do |category, category_index| + category['name'] = "#{category_index+1}. #{category['name']}" + log(" - [#{category['id']}] #{category['name']}") - categories['categories'].each do |category| - log(category['name'].upcase) category_dir = dir_path(category) category_file_path = "#{category_dir}index.html" - + category['backup_path'] = category_file_path category[:sections] = [] - sections(category['id'])['sections'].each do |section| - log(" #{section['name']}") + + _s = sections(category['id']) + next if !_s || api_error?(_s) + + _s['sections'].each_with_index do |section, section_index| + section['name'] = "#{category_index+1}-#{section_index+1}. #{section['name']}" + log(" - - [#{section['id']}] #{section['name']}") + section_dir = dir_path(category, section) section_file_path = "#{section_dir}index.html" - + section['backup_path'] = section_file_path section[:articles] = [] - articles(section['id'])['articles'].each do |article| - log(" #{article['name']}", :standard) + + _a = articles(section['id']) + next if !_a || api_error?(_a) + + _a['articles'].each_with_index do |article, article_index| + article['name'] = "#{category_index+1}-#{section_index+1}-#{article_index+1}. #{article['name']}" + log(" - - - [#{article['id']}] #{article['name']}", :standard) + article_dir = dir_path(category, section, article) article_file_path = "#{article_dir}index.html" - + article['backup_path'] = article_file_path article[:attachments] = [] - article_attachments(article['id'])['article_attachments'].each do |attachment| + + _aa = article_attachments(article['id']) + next if !_aa || api_error?(_aa) + + _aa['article_attachments'].each do |attachment| article[:attachments] << attachment # optimization, do not download attachment when already present (we could check based on the id) download_attachment!(attachment, article_dir) end - article['backup_path'] = article_file_path section[:articles] << article - File.open(article_file_path, "w+") { |f| f.puts article_html_content(article) } - end - section['backup_path'] = section_file_path category[:sections] << section end - category['backup_path'] = category_file_path @raw_data[:categories] << category end + log("\n Done. \n\n", :standard) + + log("\n Localizing URLs in all articles ... \n\n", :standard) + @raw_data[:categories].each do |c| + c[:sections].each do |s| + s[:articles].each do |a| + print '.' + a['body'] = convert_body(a['body']) + File.open(a['backup_path'], "w+") { |f| f.puts article_html_content(a) } + end + end + end + log("\n Done. \n\n", :standard) + end # can only be called AFTER export_html_and_images! @@ -129,37 +159,69 @@ def create_table_of_contents! # Section: Article content # --------------------------------------- - def article_html_content(article) - # add some boilerplat to make it all look nicer - # and replace all image links towards the local url - regex_find = /https:\/\/.+?zendesk.com.+?article_attachments\/(\d+?)\/(.+)\.(.+?)" alt/ - regex_replace = output_type == :slugified ? '\1-\2.\3" alt' : '\1.\3" alt' + def convert_body(body) + return body if body.class != String + + # replace all image links towards the local url + body.gsub!(/https:\/\/[^\.]+\.zendesk\.com\/hc\/article_attachments\/([0-9]+)\/([^\/]+)\.([^\/\."]+)/i) { + (output_type == :slugified) ? "#{$1}-#{$2}.#{$3}" : "#{$1}.#{$3}" + } + body.gsub!(/https:\/\/[^\.]+\.zendesk\.com\/hc\/[^\/]+\/categories\/([0-9]+)(-[^"]+)?/i) { + found = nil + found = raw_data[:categories].find { |c| c['id'].to_s == $1 } + found ? "../../../#{found['backup_path']}" : $& + } + body.gsub!(/https:\/\/[^\.]+\.zendesk\.com\/hc\/[^\/]+\/sections\/([0-9]+)(-[^"]+)?/i) { + found = nil + raw_data[:categories].each do |c| + found = c[:sections].find { |s| s['id'].to_s == $1 } + break if found + end + found ? "../../../#{found['backup_path']}" : $& + } + body.gsub!(/https:\/\/[^\.]+\.zendesk\.com\/hc\/[^\/]+\/articles\/([0-9]+)(-[^"]+)?/i) { + found = nil + raw_data[:categories].each do |c| + c[:sections].each do |s| + found = s[:articles].find { |a| a['id'].to_s == $1 } + break if found + end + break if found + end + found ? "../../../#{found['backup_path']}" : $& + } + body.gsub!(/<(\/?)h([1-5])/i) { "<#{$1}h#{$2.to_i + 1}" } if body.match(/

/
/
/... + body + end + def article_html_content(article) + # add some boilerplat to make it all look nicer boiler_plate_html do """ + [↑]

#{article['name']}

- #{article['body'].to_s.gsub(regex_find, regex_replace)} + #{article['body']} """ end end def main_overview_file - root_overview_file(true) + root_overview_file(recursive: true) end - def root_overview_file(recursive = false) + def root_overview_file(recursive: false, base_path: './') boiler_plate_html do content = [] + if base_path == './' + content << "

Table of Contents

" + end content << "
    " - raw_data[:categories].each do |cat| + @raw_data[:categories].each do |category| + path = "#{base_path}#{category['id']}/" content << "
  • " - content << "

    #{cat['name']}

    " - if recursive == true - content << category_overview_file(cat, recursive) - end + content << "#{category['name']}" + content << category_overview_file(category, recursive: recursive, base_path: path) if recursive content << "
  • " end content << "
" @@ -167,19 +229,19 @@ def root_overview_file(recursive = false) end end - def category_overview_file(category, recursive = false) + def category_overview_file(category, recursive: false, base_path: './') boiler_plate_html do content = [] - content << "

#{category['name']}

" if !recursive + if base_path == './' + content << "[↑]" + content << "

#{category['name']}

" + end content << "
    " category[:sections].each do |section| + path = "#{base_path}#{section['id']}/" content << "
  • " - if recursive == true - content << "

    #{section['name']}

    " - content << section_overview_file(section, recursive) - else - content << "

    #{section['name']}

    " - end + content << "#{section['name']}" + content << section_overview_file(section, recursive: recursive, base_path: path) if recursive content << "
  • " end content << "
" @@ -187,18 +249,18 @@ def category_overview_file(category, recursive = false) end end - def section_overview_file(section, recursive = false) + def section_overview_file(section, recursive: false, base_path: './') boiler_plate_html do content = [] - content << "

#{section['name']}

" if !recursive + if base_path == './' + content << "[↑]" + content << "

#{section['name']}

" + end content << "" @@ -209,9 +271,9 @@ def section_overview_file(section, recursive = false) def all_overview_files files = {'./index.html': main_overview_file} - raw_data[:categories].each do |cat| - files[cat['backup_path']] = category_overview_file(cat) - cat[:sections].each do |section| + @raw_data[:categories].each do |category| + files[category['backup_path']] = category_overview_file(category, recursive: true) + category[:sections].each do |section| files[section['backup_path']] = section_overview_file(section) end end @@ -321,8 +383,13 @@ def slugify(text) # section: API calls # --------------------------------------- def api(url) - options = {:basic_auth => @auth} - self.class.get("/api/v2/help_center/#{url}", options) + begin + options = {:basic_auth => @auth} + self.class.get("/api/v2/help_center/#{url}", options) + rescue => e + p e + nil + end end def api_error?(api_response) From 0290eb35f9215825c59fabd66248ea00cd406a3e Mon Sep 17 00:00:00 2001 From: tonextone Date: Thu, 2 Jan 2020 17:45:53 +0900 Subject: [PATCH 4/5] Attached images may be seen in multiple articles. So, should be globally referable. --- zendesk-helpcenter-export.rb | 34 +++++++++++++++++++++++----------- 1 file changed, 23 insertions(+), 11 deletions(-) diff --git a/zendesk-helpcenter-export.rb b/zendesk-helpcenter-export.rb index 17b5ca7..1e611db 100755 --- a/zendesk-helpcenter-export.rb +++ b/zendesk-helpcenter-export.rb @@ -17,11 +17,14 @@ # all of this in a nested folder structure # # - category +# - index.html # - section +# - index.html # - article -# - article.html -# - image-1.jpg -# - image-2.png +# - index.html +# - attachments +# - image-1.jpg +# - image-2.png # - meta_data.json # # Bonus: it is smart in that when you rename a category, section, article it won't @@ -56,6 +59,7 @@ class ExportHelpCenter LOG_LEVELS = {standard: 1, verbose: 2} OUTPUT_TYPES = [:slugified, :id_only] REQUIRED_INPUTS = [:email, :password, :subdomain] + ATTACHMENTS_DIR = './attachments/' def initialize(options) @@ -74,7 +78,7 @@ def initialize(options) # --------------------------------------- def to_html! - log("\n Fetching Zendesk Guide contents ... \n\n", :standard) + log("\n Fetching all contents ... \n\n", :standard) _c = categories return if !_c || api_error?(_c) @@ -118,7 +122,7 @@ def to_html! _aa['article_attachments'].each do |attachment| article[:attachments] << attachment # optimization, do not download attachment when already present (we could check based on the id) - download_attachment!(attachment, article_dir) + download_attachment!(attachment, ATTACHMENTS_DIR) end section[:articles] << article @@ -131,11 +135,11 @@ def to_html! end log("\n Done. \n\n", :standard) - log("\n Localizing URLs in all articles ... \n\n", :standard) + log("\n Localizing all URLs in articles ... \n\n", :standard) @raw_data[:categories].each do |c| c[:sections].each do |s| s[:articles].each do |a| - print '.' + log(" - - - [#{a['id']}] #{a['name']}", :standard) a['body'] = convert_body(a['body']) File.open(a['backup_path'], "w+") { |f| f.puts article_html_content(a) } end @@ -163,8 +167,13 @@ def convert_body(body) return body if body.class != String # replace all image links towards the local url - body.gsub!(/https:\/\/[^\.]+\.zendesk\.com\/hc\/article_attachments\/([0-9]+)\/([^\/]+)\.([^\/\."]+)/i) { - (output_type == :slugified) ? "#{$1}-#{$2}.#{$3}" : "#{$1}.#{$3}" + body.gsub!(/['"](https:\/\/[^\.]+\.zendesk\.com\/hc\/article_attachments\/([0-9]+)\/([^\/"]+)\.(png|jpe?g|gif|svg))['"]/i) { + attachment = {} + attachment['content_url'] = $1 + attachment['id'] = $2 + attachment['file_name'] = "#{$3}.#{$4}" + download_attachment!(attachment, ATTACHMENTS_DIR) + (output_type == :slugified) ? "\"../../../#{ATTACHMENTS_DIR}#{$2}-#{$3}.#{$4}\"" : "\"../../../#{ATTACHMENTS_DIR}#{$2}.#{$4}\"" } body.gsub!(/https:\/\/[^\.]+\.zendesk\.com\/hc\/[^\/]+\/categories\/([0-9]+)(-[^"]+)?/i) { found = nil @@ -420,13 +429,16 @@ def article_attachments(article_id) api("articles/#{article_id}/attachments.jso def download_attachment!(article_attachment, store_in_dir) - file_name = "#{article_attachment['id']}#{output_type == :slugified ? "-#{article_attachment['file_name']}" : "#{File.extname(article_attachment['file_name'])}"}" + Dir.mkdir(store_in_dir) unless File.exists?(store_in_dir) + + suffix = output_type == :slugified ? "-#{article_attachment['file_name']}" : "#{File.extname(article_attachment['file_name'])}" + file_name = "#{article_attachment['id']}#{suffix}" # rename file if it existed with same id but incorrect name rename_dir_or_file_starting_with_id!(store_in_dir, article_attachment['id'], file_name) # if file with same id already present, do not "redownload" return true if Dir.entries(store_in_dir).select{|e| e.start_with?(article_attachment['id'].to_s)}.length > 0 - log(" #{article_attachment['file_name']}") + log(" - - - - #{article_attachment['file_name']}") begin options = {:basic_auth => @auth} From 523b69842229dd8e6a74e39a180352110eb51f84 Mon Sep 17 00:00:00 2001 From: tonextone Date: Thu, 2 Jan 2020 18:29:18 +0900 Subject: [PATCH 5/5] update README. --- README.md | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index e733cf1..febdf3a 100755 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# Ruby script to export your Zendesk helpcenter (v 0.2) +# Ruby script to export your Zendesk helpcenter Script based on https://github.com/skipjac/pull-zendesk-forums (which exports the forum, not the help center article) @@ -6,15 +6,17 @@ Script based on https://github.com/skipjac/pull-zendesk-forums it uses the Zendesk API to export all categories, sections, articles, article_attachments to html (and json) all of this in a nested folder structure - - category - - section - - article - - article.html - - image-1.jpg - - image-2.png - meta_data.json + - / + - index.html + -
/ + - index.html + -
/ + - index.html + - attachments/ + - image-1.jpg + - image-2.png + - meta_data.json -![Zendesk demo](https://github.com/pjmuller/zendesk-helpcenter-export/raw/master/demo-screenshot.png) Bonus: it is smart in that when you rename a category, section, article it won't start to create duplicate folders but renames the old ones. The script can thus be used for both a new dump as updating an existing one.