Skip to content

Commit

Permalink
Merge pull request #195 from sharifhsn/cp437
Browse files Browse the repository at this point in the history
Implement character table control and codepage 437 option
  • Loading branch information
sharkdp authored Dec 11, 2023
2 parents 663e4c1 + 1310b1d commit 2bab0c7
Show file tree
Hide file tree
Showing 4 changed files with 170 additions and 24 deletions.
48 changes: 48 additions & 0 deletions src/colors.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
use owo_colors::{colors, Color};

pub const COLOR_NULL: &[u8] = colors::BrightBlack::ANSI_FG.as_bytes();
pub const COLOR_OFFSET: &[u8] = colors::BrightBlack::ANSI_FG.as_bytes();
pub const COLOR_ASCII_PRINTABLE: &[u8] = colors::Cyan::ANSI_FG.as_bytes();
pub const COLOR_ASCII_WHITESPACE: &[u8] = colors::Green::ANSI_FG.as_bytes();
pub const COLOR_ASCII_OTHER: &[u8] = colors::Green::ANSI_FG.as_bytes();
pub const COLOR_NONASCII: &[u8] = colors::Yellow::ANSI_FG.as_bytes();
pub const COLOR_RESET: &[u8] = colors::Default::ANSI_FG.as_bytes();

#[rustfmt::skip]
pub const CP437: [char; 256] = [
// Copyright (c) 2016, Delan Azabani <[email protected]>
//
// Permission to use, copy, modify, and/or distribute this software for any
// purpose with or without fee is hereby granted, provided that the above
// copyright notice and this permission notice appear in all copies.
//
// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
//
// modified to use the ⋄ character instead of ␀

// use https://en.wikipedia.org/w/index.php?title=Code_page_437&oldid=978947122
// not ftp://ftp.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP437.TXT
// because we want the graphic versions of 01h–1Fh + 7Fh
'⋄','☺','☻','♥','♦','♣','♠','•','◘','○','◙','♂','♀','♪','♫','☼',
'►','◄','↕','‼','¶','§','▬','↨','↑','↓','→','←','∟','↔','▲','▼',
' ','!','"','#','$','%','&','\'','(',')','*','+',',','-','.','/',
'0','1','2','3','4','5','6','7','8','9',':',';','<','=','>','?',
'@','A','B','C','D','E','F','G','H','I','J','K','L','M','N','O',
'P','Q','R','S','T','U','V','W','X','Y','Z','[','\\',']','^','_',
'`','a','b','c','d','e','f','g','h','i','j','k','l','m','n','o',
'p','q','r','s','t','u','v','w','x','y','z','{','|','}','~','⌂',
'Ç','ü','é','â','ä','à','å','ç','ê','ë','è','ï','î','ì','Ä','Å',
'É','æ','Æ','ô','ö','ò','û','ù','ÿ','Ö','Ü','¢','£','¥','₧','ƒ',
'á','í','ó','ú','ñ','Ñ','ª','º','¿','⌐','¬','½','¼','¡','«','»',
'░','▒','▓','│','┤','╡','╢','╖','╕','╣','║','╗','╝','╜','╛','┐',
'└','┴','┬','├','─','┼','╞','╟','╚','╔','╩','╦','╠','═','╬','╧',
'╨','╤','╥','╙','╘','╒','╓','╫','╪','┘','┌','█','▄','▌','▐','▀',
'α','ß','Γ','π','Σ','σ','µ','τ','Φ','Θ','Ω','δ','∞','φ','ε','∩',
'≡','±','≥','≤','⌠','⌡','÷','≈','°','∙','·','√','ⁿ','²','■','ff',
];
61 changes: 38 additions & 23 deletions src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,26 +1,18 @@
pub(crate) mod colors;
pub(crate) mod input;

pub use colors::*;
pub use input::*;

use std::io::{self, BufReader, Read, Write};

use owo_colors::{colors, Color};

pub enum Base {
Binary,
Octal,
Decimal,
Hexadecimal,
}

const COLOR_NULL: &[u8] = colors::BrightBlack::ANSI_FG.as_bytes();
const COLOR_OFFSET: &[u8] = colors::BrightBlack::ANSI_FG.as_bytes();
const COLOR_ASCII_PRINTABLE: &[u8] = colors::Cyan::ANSI_FG.as_bytes();
const COLOR_ASCII_WHITESPACE: &[u8] = colors::Green::ANSI_FG.as_bytes();
const COLOR_ASCII_OTHER: &[u8] = colors::Green::ANSI_FG.as_bytes();
const COLOR_NONASCII: &[u8] = colors::Yellow::ANSI_FG.as_bytes();
const COLOR_RESET: &[u8] = colors::Default::ANSI_FG.as_bytes();

#[derive(Copy, Clone)]
pub enum ByteCategory {
Null,
Expand All @@ -30,6 +22,13 @@ pub enum ByteCategory {
NonAscii,
}

#[derive(Copy, Clone)]
#[non_exhaustive]
pub enum CharacterTable {
AsciiOnly,
CP437,
}

#[derive(Copy, Clone)]
pub enum Endianness {
Little,
Expand Down Expand Up @@ -64,7 +63,6 @@ impl Byte {

fn color(self) -> &'static [u8] {
use crate::ByteCategory::*;

match self.category() {
Null => COLOR_NULL,
AsciiPrintable => COLOR_ASCII_PRINTABLE,
Expand All @@ -74,16 +72,18 @@ impl Byte {
}
}

fn as_char(self) -> char {
fn as_char(self, character_table: CharacterTable) -> char {
use crate::ByteCategory::*;

match self.category() {
Null => '⋄',
AsciiPrintable => self.0 as char,
AsciiWhitespace if self.0 == 0x20 => ' ',
AsciiWhitespace => '_',
AsciiOther => '•',
NonAscii => '×',
match character_table {
CharacterTable::AsciiOnly => match self.category() {
Null => '⋄',
AsciiPrintable => self.0 as char,
AsciiWhitespace if self.0 == 0x20 => ' ',
AsciiWhitespace => '_',
AsciiOther => '•',
NonAscii => '×',
},
CharacterTable::CP437 => CP437[self.0 as usize],
}
}
}
Expand Down Expand Up @@ -167,6 +167,7 @@ pub struct PrinterBuilder<'a, Writer: Write> {
group_size: u8,
base: Base,
endianness: Endianness,
character_table: CharacterTable,
}

impl<'a, Writer: Write> PrinterBuilder<'a, Writer> {
Expand All @@ -182,6 +183,7 @@ impl<'a, Writer: Write> PrinterBuilder<'a, Writer> {
group_size: 1,
base: Base::Hexadecimal,
endianness: Endianness::Big,
character_table: CharacterTable::AsciiOnly,
}
}

Expand Down Expand Up @@ -230,6 +232,11 @@ impl<'a, Writer: Write> PrinterBuilder<'a, Writer> {
self
}

pub fn character_table(mut self, character_table: CharacterTable) -> Self {
self.character_table = character_table;
self
}

pub fn build(self) -> Printer<'a, Writer> {
Printer::new(
self.writer,
Expand All @@ -242,6 +249,7 @@ impl<'a, Writer: Write> PrinterBuilder<'a, Writer> {
self.group_size,
self.base,
self.endianness,
self.character_table,
)
}
}
Expand Down Expand Up @@ -271,6 +279,8 @@ pub struct Printer<'a, Writer: Write> {
base_digits: u8,
/// Whether to show groups in little or big endian format.
endianness: Endianness,
/// The character table to reference for the character panel.
character_table: CharacterTable,
}

impl<'a, Writer: Write> Printer<'a, Writer> {
Expand All @@ -285,6 +295,7 @@ impl<'a, Writer: Write> Printer<'a, Writer> {
group_size: u8,
base: Base,
endianness: Endianness,
character_table: CharacterTable,
) -> Printer<'a, Writer> {
Printer {
idx: 0,
Expand All @@ -304,7 +315,7 @@ impl<'a, Writer: Write> Printer<'a, Writer> {
})
.collect(),
byte_char_panel: (0u8..=u8::MAX)
.map(|i| format!("{}", Byte(i).as_char()))
.map(|i| format!("{}", Byte(i).as_char(character_table)))
.collect(),
byte_hex_panel_g: (0u8..=u8::MAX).map(|i| format!("{i:02x}")).collect(),
squeezer: if use_squeeze {
Expand All @@ -323,6 +334,7 @@ impl<'a, Writer: Write> Printer<'a, Writer> {
Base::Hexadecimal => 2,
},
endianness,
character_table,
}
}

Expand Down Expand Up @@ -401,8 +413,7 @@ impl<'a, Writer: Write> Printer<'a, Writer> {
if self.show_position_panel {
match self.squeezer {
Squeezer::Print => {
self.writer
.write_all(self.byte_char_panel[b'*' as usize].as_bytes())?;
self.writer.write_all(&[b'*'])?;
if self.show_color {
self.writer.write_all(COLOR_RESET)?;
}
Expand Down Expand Up @@ -732,6 +743,7 @@ mod tests {
1,
Base::Hexadecimal,
Endianness::Big,
CharacterTable::AsciiOnly,
);

printer.print_all(input).unwrap();
Expand Down Expand Up @@ -787,6 +799,7 @@ mod tests {
1,
Base::Hexadecimal,
Endianness::Big,
CharacterTable::AsciiOnly,
);
printer.display_offset(0xdeadbeef);

Expand Down Expand Up @@ -821,6 +834,7 @@ mod tests {
1,
Base::Hexadecimal,
Endianness::Big,
CharacterTable::AsciiOnly,
);

printer.print_all(input).unwrap();
Expand Down Expand Up @@ -881,6 +895,7 @@ mod tests {
1,
Base::Hexadecimal,
Endianness::Big,
CharacterTable::AsciiOnly,
);

printer.print_all(input).unwrap();
Expand Down
26 changes: 25 additions & 1 deletion src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ use thiserror::Error as ThisError;

use terminal_size::terminal_size;

use hexyl::{Base, BorderStyle, Endianness, Input, PrinterBuilder};
use hexyl::{Base, BorderStyle, CharacterTable, Endianness, Input, PrinterBuilder};

#[cfg(test)]
mod tests;
Expand Down Expand Up @@ -212,6 +212,18 @@ fn run() -> Result<()> {
.hide(true)
.help("An alias for '--endianness=little'."),
)
.arg(
Arg::new("character-table")
.long("character-table")
.value_name("FORMAT")
.value_parser(["codepage-437", "ascii-only"])
.default_value("ascii-only")
.help(
"The character table that should be used. 'ascii-only' \
will show dots for non-ASCII characters, 'codepage-437' \
will use Code page 437 for those characters."
),
)
.arg(
Arg::new("base")
.short('b')
Expand Down Expand Up @@ -469,6 +481,17 @@ fn run() -> Result<()> {
("big", _) => Endianness::Big,
_ => unreachable!(),
};

let character_table = match matches
.get_one::<String>("character-table")
.unwrap()
.as_ref()
{
"ascii-only" => CharacterTable::AsciiOnly,
"codepage-437" => CharacterTable::CP437,
_ => unreachable!(),
};

let stdout = io::stdout();
let mut stdout_lock = BufWriter::new(stdout.lock());

Expand All @@ -482,6 +505,7 @@ fn run() -> Result<()> {
.group_size(group_size)
.with_base(base)
.endianness(endianness)
.character_table(character_table)
.build();
printer.display_offset(skip_offset + display_offset);
printer.print_all(&mut reader).map_err(|e| anyhow!(e))?;
Expand Down
59 changes: 59 additions & 0 deletions tests/integration_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -618,3 +618,62 @@ mod base {
);
}
}

mod character_table {
use super::hexyl;
use super::PrettyAssert;

#[test]
fn codepage_437() {
hexyl()
.arg("hello_world_elf64")
.arg("--color=never")
.arg("--character-table=codepage-437")
.assert()
.success()
.pretty_stdout(
"┌────────┬─────────────────────────┬─────────────────────────┬────────┬────────┐
│00000000│ 7f 45 4c 46 02 01 01 00 ┊ 00 00 00 00 00 00 00 00 │⌂ELF☻☺☺⋄┊⋄⋄⋄⋄⋄⋄⋄⋄│
│00000010│ 02 00 3e 00 01 00 00 00 ┊ 00 10 40 00 00 00 00 00 │☻⋄>⋄☺⋄⋄⋄┊⋄►@⋄⋄⋄⋄⋄│
│00000020│ 40 00 00 00 00 00 00 00 ┊ 28 20 00 00 00 00 00 00 │@⋄⋄⋄⋄⋄⋄⋄┊( ⋄⋄⋄⋄⋄⋄│
│00000030│ 00 00 00 00 40 00 38 00 ┊ 03 00 40 00 04 00 03 00 │⋄⋄⋄⋄@⋄8⋄┊♥⋄@⋄♦⋄♥⋄│
│00000040│ 01 00 00 00 04 00 00 00 ┊ 00 00 00 00 00 00 00 00 │☺⋄⋄⋄♦⋄⋄⋄┊⋄⋄⋄⋄⋄⋄⋄⋄│
│00000050│ 00 00 40 00 00 00 00 00 ┊ 00 00 40 00 00 00 00 00 │⋄⋄@⋄⋄⋄⋄⋄┊⋄⋄@⋄⋄⋄⋄⋄│
│00000060│ e8 00 00 00 00 00 00 00 ┊ e8 00 00 00 00 00 00 00 │Φ⋄⋄⋄⋄⋄⋄⋄┊Φ⋄⋄⋄⋄⋄⋄⋄│
│00000070│ 00 10 00 00 00 00 00 00 ┊ 01 00 00 00 05 00 00 00 │⋄►⋄⋄⋄⋄⋄⋄┊☺⋄⋄⋄♣⋄⋄⋄│
│00000080│ 00 10 00 00 00 00 00 00 ┊ 00 10 40 00 00 00 00 00 │⋄►⋄⋄⋄⋄⋄⋄┊⋄►@⋄⋄⋄⋄⋄│
│00000090│ 00 10 40 00 00 00 00 00 ┊ 1d 00 00 00 00 00 00 00 │⋄►@⋄⋄⋄⋄⋄┊↔⋄⋄⋄⋄⋄⋄⋄│
│000000a0│ 1d 00 00 00 00 00 00 00 ┊ 00 10 00 00 00 00 00 00 │↔⋄⋄⋄⋄⋄⋄⋄┊⋄►⋄⋄⋄⋄⋄⋄│
│000000b0│ 01 00 00 00 06 00 00 00 ┊ 00 20 00 00 00 00 00 00 │☺⋄⋄⋄♠⋄⋄⋄┊⋄ ⋄⋄⋄⋄⋄⋄│
│000000c0│ 00 20 40 00 00 00 00 00 ┊ 00 20 40 00 00 00 00 00 │⋄ @⋄⋄⋄⋄⋄┊⋄ @⋄⋄⋄⋄⋄│
│000000d0│ 0e 00 00 00 00 00 00 00 ┊ 0e 00 00 00 00 00 00 00 │♫⋄⋄⋄⋄⋄⋄⋄┊♫⋄⋄⋄⋄⋄⋄⋄│
│000000e0│ 00 10 00 00 00 00 00 00 ┊ 00 00 00 00 00 00 00 00 │⋄►⋄⋄⋄⋄⋄⋄┊⋄⋄⋄⋄⋄⋄⋄⋄│
│000000f0│ 00 00 00 00 00 00 00 00 ┊ 00 00 00 00 00 00 00 00 │⋄⋄⋄⋄⋄⋄⋄⋄┊⋄⋄⋄⋄⋄⋄⋄⋄│
│* │ ┊ │ ┊ │
│00001000│ ba 0e 00 00 00 b9 00 20 ┊ 40 00 bb 01 00 00 00 b8 │║♫⋄⋄⋄╣⋄ ┊@⋄╗☺⋄⋄⋄╕│
│00001010│ 04 00 00 00 cd 80 b8 01 ┊ 00 00 00 cd 80 00 00 00 │♦⋄⋄⋄═Ç╕☺┊⋄⋄⋄═Ç⋄⋄⋄│
│00001020│ 00 00 00 00 00 00 00 00 ┊ 00 00 00 00 00 00 00 00 │⋄⋄⋄⋄⋄⋄⋄⋄┊⋄⋄⋄⋄⋄⋄⋄⋄│
│* │ ┊ │ ┊ │
│00002000│ 48 65 6c 6c 6f 2c 20 77 ┊ 6f 72 6c 64 21 0a 00 2e │Hello, w┊orld!◙⋄.│
│00002010│ 73 68 73 74 72 74 61 62 ┊ 00 2e 74 65 78 74 00 2e │shstrtab┊⋄.text⋄.│
│00002020│ 64 61 74 61 00 00 00 00 ┊ 00 00 00 00 00 00 00 00 │data⋄⋄⋄⋄┊⋄⋄⋄⋄⋄⋄⋄⋄│
│00002030│ 00 00 00 00 00 00 00 00 ┊ 00 00 00 00 00 00 00 00 │⋄⋄⋄⋄⋄⋄⋄⋄┊⋄⋄⋄⋄⋄⋄⋄⋄│
│* │ ┊ │ ┊ │
│00002060│ 00 00 00 00 00 00 00 00 ┊ 0b 00 00 00 01 00 00 00 │⋄⋄⋄⋄⋄⋄⋄⋄┊♂⋄⋄⋄☺⋄⋄⋄│
│00002070│ 06 00 00 00 00 00 00 00 ┊ 00 10 40 00 00 00 00 00 │♠⋄⋄⋄⋄⋄⋄⋄┊⋄►@⋄⋄⋄⋄⋄│
│00002080│ 00 10 00 00 00 00 00 00 ┊ 1d 00 00 00 00 00 00 00 │⋄►⋄⋄⋄⋄⋄⋄┊↔⋄⋄⋄⋄⋄⋄⋄│
│00002090│ 00 00 00 00 00 00 00 00 ┊ 10 00 00 00 00 00 00 00 │⋄⋄⋄⋄⋄⋄⋄⋄┊►⋄⋄⋄⋄⋄⋄⋄│
│000020a0│ 00 00 00 00 00 00 00 00 ┊ 11 00 00 00 01 00 00 00 │⋄⋄⋄⋄⋄⋄⋄⋄┊◄⋄⋄⋄☺⋄⋄⋄│
│000020b0│ 03 00 00 00 00 00 00 00 ┊ 00 20 40 00 00 00 00 00 │♥⋄⋄⋄⋄⋄⋄⋄┊⋄ @⋄⋄⋄⋄⋄│
│000020c0│ 00 20 00 00 00 00 00 00 ┊ 0e 00 00 00 00 00 00 00 │⋄ ⋄⋄⋄⋄⋄⋄┊♫⋄⋄⋄⋄⋄⋄⋄│
│000020d0│ 00 00 00 00 00 00 00 00 ┊ 04 00 00 00 00 00 00 00 │⋄⋄⋄⋄⋄⋄⋄⋄┊♦⋄⋄⋄⋄⋄⋄⋄│
│000020e0│ 00 00 00 00 00 00 00 00 ┊ 01 00 00 00 03 00 00 00 │⋄⋄⋄⋄⋄⋄⋄⋄┊☺⋄⋄⋄♥⋄⋄⋄│
│000020f0│ 00 00 00 00 00 00 00 00 ┊ 00 00 00 00 00 00 00 00 │⋄⋄⋄⋄⋄⋄⋄⋄┊⋄⋄⋄⋄⋄⋄⋄⋄│
│00002100│ 0e 20 00 00 00 00 00 00 ┊ 17 00 00 00 00 00 00 00 │♫ ⋄⋄⋄⋄⋄⋄┊↨⋄⋄⋄⋄⋄⋄⋄│
│00002110│ 00 00 00 00 00 00 00 00 ┊ 01 00 00 00 00 00 00 00 │⋄⋄⋄⋄⋄⋄⋄⋄┊☺⋄⋄⋄⋄⋄⋄⋄│
│00002120│ 00 00 00 00 00 00 00 00 ┊ │⋄⋄⋄⋄⋄⋄⋄⋄┊ │
└────────┴─────────────────────────┴─────────────────────────┴────────┴────────┘
",
);
}
}

0 comments on commit 2bab0c7

Please sign in to comment.