diff --git a/src/colors.rs b/src/colors.rs new file mode 100644 index 00000000..748e6fb8 --- /dev/null +++ b/src/colors.rs @@ -0,0 +1,48 @@ +use owo_colors::{colors, Color}; + +pub const COLOR_NULL: &[u8] = colors::BrightBlack::ANSI_FG.as_bytes(); +pub const COLOR_OFFSET: &[u8] = colors::BrightBlack::ANSI_FG.as_bytes(); +pub const COLOR_ASCII_PRINTABLE: &[u8] = colors::Cyan::ANSI_FG.as_bytes(); +pub const COLOR_ASCII_WHITESPACE: &[u8] = colors::Green::ANSI_FG.as_bytes(); +pub const COLOR_ASCII_OTHER: &[u8] = colors::Green::ANSI_FG.as_bytes(); +pub const COLOR_NONASCII: &[u8] = colors::Yellow::ANSI_FG.as_bytes(); +pub const COLOR_RESET: &[u8] = colors::Default::ANSI_FG.as_bytes(); + +#[rustfmt::skip] +pub const CP437: [char; 256] = [ + // Copyright (c) 2016, Delan Azabani + // + // Permission to use, copy, modify, and/or distribute this software for any + // purpose with or without fee is hereby granted, provided that the above + // copyright notice and this permission notice appear in all copies. + // + // THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + // WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + // MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + // ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + // WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + // ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + // OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + // + // modified to use the ⋄ character instead of ␀ + + // use https://en.wikipedia.org/w/index.php?title=Code_page_437&oldid=978947122 + // not ftp://ftp.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP437.TXT + // because we want the graphic versions of 01h–1Fh + 7Fh + '⋄','☺','☻','♥','♦','♣','♠','•','◘','○','◙','♂','♀','♪','♫','☼', + '►','◄','↕','‼','¶','§','▬','↨','↑','↓','→','←','∟','↔','▲','▼', + ' ','!','"','#','$','%','&','\'','(',')','*','+',',','-','.','/', + '0','1','2','3','4','5','6','7','8','9',':',';','<','=','>','?', + '@','A','B','C','D','E','F','G','H','I','J','K','L','M','N','O', + 'P','Q','R','S','T','U','V','W','X','Y','Z','[','\\',']','^','_', + '`','a','b','c','d','e','f','g','h','i','j','k','l','m','n','o', + 'p','q','r','s','t','u','v','w','x','y','z','{','|','}','~','⌂', + 'Ç','ü','é','â','ä','à','å','ç','ê','ë','è','ï','î','ì','Ä','Å', + 'É','æ','Æ','ô','ö','ò','û','ù','ÿ','Ö','Ü','¢','£','¥','₧','ƒ', + 'á','í','ó','ú','ñ','Ñ','ª','º','¿','⌐','¬','½','¼','¡','«','»', + '░','▒','▓','│','┤','╡','╢','╖','╕','╣','║','╗','╝','╜','╛','┐', + '└','┴','┬','├','─','┼','╞','╟','╚','╔','╩','╦','╠','═','╬','╧', + '╨','╤','╥','╙','╘','╒','╓','╫','╪','┘','┌','█','▄','▌','▐','▀', + 'α','ß','Γ','π','Σ','σ','µ','τ','Φ','Θ','Ω','δ','∞','φ','ε','∩', + '≡','±','≥','≤','⌠','⌡','÷','≈','°','∙','·','√','ⁿ','²','■','ff', +]; diff --git a/src/lib.rs b/src/lib.rs index 1c0c62e2..145f46d0 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,11 +1,11 @@ +pub(crate) mod colors; pub(crate) mod input; +pub use colors::*; pub use input::*; use std::io::{self, BufReader, Read, Write}; -use owo_colors::{colors, Color}; - pub enum Base { Binary, Octal, @@ -13,14 +13,6 @@ pub enum Base { Hexadecimal, } -const COLOR_NULL: &[u8] = colors::BrightBlack::ANSI_FG.as_bytes(); -const COLOR_OFFSET: &[u8] = colors::BrightBlack::ANSI_FG.as_bytes(); -const COLOR_ASCII_PRINTABLE: &[u8] = colors::Cyan::ANSI_FG.as_bytes(); -const COLOR_ASCII_WHITESPACE: &[u8] = colors::Green::ANSI_FG.as_bytes(); -const COLOR_ASCII_OTHER: &[u8] = colors::Green::ANSI_FG.as_bytes(); -const COLOR_NONASCII: &[u8] = colors::Yellow::ANSI_FG.as_bytes(); -const COLOR_RESET: &[u8] = colors::Default::ANSI_FG.as_bytes(); - #[derive(Copy, Clone)] pub enum ByteCategory { Null, @@ -30,6 +22,13 @@ pub enum ByteCategory { NonAscii, } +#[derive(Copy, Clone)] +#[non_exhaustive] +pub enum CharacterTable { + AsciiOnly, + CP437, +} + #[derive(Copy, Clone)] pub enum Endianness { Little, @@ -64,7 +63,6 @@ impl Byte { fn color(self) -> &'static [u8] { use crate::ByteCategory::*; - match self.category() { Null => COLOR_NULL, AsciiPrintable => COLOR_ASCII_PRINTABLE, @@ -74,16 +72,18 @@ impl Byte { } } - fn as_char(self) -> char { + fn as_char(self, character_table: CharacterTable) -> char { use crate::ByteCategory::*; - - match self.category() { - Null => '⋄', - AsciiPrintable => self.0 as char, - AsciiWhitespace if self.0 == 0x20 => ' ', - AsciiWhitespace => '_', - AsciiOther => '•', - NonAscii => '×', + match character_table { + CharacterTable::AsciiOnly => match self.category() { + Null => '⋄', + AsciiPrintable => self.0 as char, + AsciiWhitespace if self.0 == 0x20 => ' ', + AsciiWhitespace => '_', + AsciiOther => '•', + NonAscii => '×', + }, + CharacterTable::CP437 => CP437[self.0 as usize], } } } @@ -167,6 +167,7 @@ pub struct PrinterBuilder<'a, Writer: Write> { group_size: u8, base: Base, endianness: Endianness, + character_table: CharacterTable, } impl<'a, Writer: Write> PrinterBuilder<'a, Writer> { @@ -182,6 +183,7 @@ impl<'a, Writer: Write> PrinterBuilder<'a, Writer> { group_size: 1, base: Base::Hexadecimal, endianness: Endianness::Big, + character_table: CharacterTable::AsciiOnly, } } @@ -230,6 +232,11 @@ impl<'a, Writer: Write> PrinterBuilder<'a, Writer> { self } + pub fn character_table(mut self, character_table: CharacterTable) -> Self { + self.character_table = character_table; + self + } + pub fn build(self) -> Printer<'a, Writer> { Printer::new( self.writer, @@ -242,6 +249,7 @@ impl<'a, Writer: Write> PrinterBuilder<'a, Writer> { self.group_size, self.base, self.endianness, + self.character_table, ) } } @@ -271,6 +279,8 @@ pub struct Printer<'a, Writer: Write> { base_digits: u8, /// Whether to show groups in little or big endian format. endianness: Endianness, + /// The character table to reference for the character panel. + character_table: CharacterTable, } impl<'a, Writer: Write> Printer<'a, Writer> { @@ -285,6 +295,7 @@ impl<'a, Writer: Write> Printer<'a, Writer> { group_size: u8, base: Base, endianness: Endianness, + character_table: CharacterTable, ) -> Printer<'a, Writer> { Printer { idx: 0, @@ -304,7 +315,7 @@ impl<'a, Writer: Write> Printer<'a, Writer> { }) .collect(), byte_char_panel: (0u8..=u8::MAX) - .map(|i| format!("{}", Byte(i).as_char())) + .map(|i| format!("{}", Byte(i).as_char(character_table))) .collect(), byte_hex_panel_g: (0u8..=u8::MAX).map(|i| format!("{i:02x}")).collect(), squeezer: if use_squeeze { @@ -323,6 +334,7 @@ impl<'a, Writer: Write> Printer<'a, Writer> { Base::Hexadecimal => 2, }, endianness, + character_table, } } @@ -401,8 +413,7 @@ impl<'a, Writer: Write> Printer<'a, Writer> { if self.show_position_panel { match self.squeezer { Squeezer::Print => { - self.writer - .write_all(self.byte_char_panel[b'*' as usize].as_bytes())?; + self.writer.write_all(&[b'*'])?; if self.show_color { self.writer.write_all(COLOR_RESET)?; } @@ -732,6 +743,7 @@ mod tests { 1, Base::Hexadecimal, Endianness::Big, + CharacterTable::AsciiOnly, ); printer.print_all(input).unwrap(); @@ -787,6 +799,7 @@ mod tests { 1, Base::Hexadecimal, Endianness::Big, + CharacterTable::AsciiOnly, ); printer.display_offset(0xdeadbeef); @@ -821,6 +834,7 @@ mod tests { 1, Base::Hexadecimal, Endianness::Big, + CharacterTable::AsciiOnly, ); printer.print_all(input).unwrap(); @@ -881,6 +895,7 @@ mod tests { 1, Base::Hexadecimal, Endianness::Big, + CharacterTable::AsciiOnly, ); printer.print_all(input).unwrap(); diff --git a/src/main.rs b/src/main.rs index 5e022f30..2d1d8d7e 100644 --- a/src/main.rs +++ b/src/main.rs @@ -17,7 +17,7 @@ use thiserror::Error as ThisError; use terminal_size::terminal_size; -use hexyl::{Base, BorderStyle, Endianness, Input, PrinterBuilder}; +use hexyl::{Base, BorderStyle, CharacterTable, Endianness, Input, PrinterBuilder}; #[cfg(test)] mod tests; @@ -212,6 +212,18 @@ fn run() -> Result<()> { .hide(true) .help("An alias for '--endianness=little'."), ) + .arg( + Arg::new("character-table") + .long("character-table") + .value_name("FORMAT") + .value_parser(["codepage-437", "ascii-only"]) + .default_value("ascii-only") + .help( + "The character table that should be used. 'ascii-only' \ + will show dots for non-ASCII characters, 'codepage-437' \ + will use Code page 437 for those characters." + ), + ) .arg( Arg::new("base") .short('b') @@ -469,6 +481,17 @@ fn run() -> Result<()> { ("big", _) => Endianness::Big, _ => unreachable!(), }; + + let character_table = match matches + .get_one::("character-table") + .unwrap() + .as_ref() + { + "ascii-only" => CharacterTable::AsciiOnly, + "codepage-437" => CharacterTable::CP437, + _ => unreachable!(), + }; + let stdout = io::stdout(); let mut stdout_lock = BufWriter::new(stdout.lock()); @@ -482,6 +505,7 @@ fn run() -> Result<()> { .group_size(group_size) .with_base(base) .endianness(endianness) + .character_table(character_table) .build(); printer.display_offset(skip_offset + display_offset); printer.print_all(&mut reader).map_err(|e| anyhow!(e))?; diff --git a/tests/integration_tests.rs b/tests/integration_tests.rs index fc10dfaa..a44c77c0 100644 --- a/tests/integration_tests.rs +++ b/tests/integration_tests.rs @@ -618,3 +618,62 @@ mod base { ); } } + +mod character_table { + use super::hexyl; + use super::PrettyAssert; + + #[test] + fn codepage_437() { + hexyl() + .arg("hello_world_elf64") + .arg("--color=never") + .arg("--character-table=codepage-437") + .assert() + .success() + .pretty_stdout( + "┌────────┬─────────────────────────┬─────────────────────────┬────────┬────────┐ +│00000000│ 7f 45 4c 46 02 01 01 00 ┊ 00 00 00 00 00 00 00 00 │⌂ELF☻☺☺⋄┊⋄⋄⋄⋄⋄⋄⋄⋄│ +│00000010│ 02 00 3e 00 01 00 00 00 ┊ 00 10 40 00 00 00 00 00 │☻⋄>⋄☺⋄⋄⋄┊⋄►@⋄⋄⋄⋄⋄│ +│00000020│ 40 00 00 00 00 00 00 00 ┊ 28 20 00 00 00 00 00 00 │@⋄⋄⋄⋄⋄⋄⋄┊( ⋄⋄⋄⋄⋄⋄│ +│00000030│ 00 00 00 00 40 00 38 00 ┊ 03 00 40 00 04 00 03 00 │⋄⋄⋄⋄@⋄8⋄┊♥⋄@⋄♦⋄♥⋄│ +│00000040│ 01 00 00 00 04 00 00 00 ┊ 00 00 00 00 00 00 00 00 │☺⋄⋄⋄♦⋄⋄⋄┊⋄⋄⋄⋄⋄⋄⋄⋄│ +│00000050│ 00 00 40 00 00 00 00 00 ┊ 00 00 40 00 00 00 00 00 │⋄⋄@⋄⋄⋄⋄⋄┊⋄⋄@⋄⋄⋄⋄⋄│ +│00000060│ e8 00 00 00 00 00 00 00 ┊ e8 00 00 00 00 00 00 00 │Φ⋄⋄⋄⋄⋄⋄⋄┊Φ⋄⋄⋄⋄⋄⋄⋄│ +│00000070│ 00 10 00 00 00 00 00 00 ┊ 01 00 00 00 05 00 00 00 │⋄►⋄⋄⋄⋄⋄⋄┊☺⋄⋄⋄♣⋄⋄⋄│ +│00000080│ 00 10 00 00 00 00 00 00 ┊ 00 10 40 00 00 00 00 00 │⋄►⋄⋄⋄⋄⋄⋄┊⋄►@⋄⋄⋄⋄⋄│ +│00000090│ 00 10 40 00 00 00 00 00 ┊ 1d 00 00 00 00 00 00 00 │⋄►@⋄⋄⋄⋄⋄┊↔⋄⋄⋄⋄⋄⋄⋄│ +│000000a0│ 1d 00 00 00 00 00 00 00 ┊ 00 10 00 00 00 00 00 00 │↔⋄⋄⋄⋄⋄⋄⋄┊⋄►⋄⋄⋄⋄⋄⋄│ +│000000b0│ 01 00 00 00 06 00 00 00 ┊ 00 20 00 00 00 00 00 00 │☺⋄⋄⋄♠⋄⋄⋄┊⋄ ⋄⋄⋄⋄⋄⋄│ +│000000c0│ 00 20 40 00 00 00 00 00 ┊ 00 20 40 00 00 00 00 00 │⋄ @⋄⋄⋄⋄⋄┊⋄ @⋄⋄⋄⋄⋄│ +│000000d0│ 0e 00 00 00 00 00 00 00 ┊ 0e 00 00 00 00 00 00 00 │♫⋄⋄⋄⋄⋄⋄⋄┊♫⋄⋄⋄⋄⋄⋄⋄│ +│000000e0│ 00 10 00 00 00 00 00 00 ┊ 00 00 00 00 00 00 00 00 │⋄►⋄⋄⋄⋄⋄⋄┊⋄⋄⋄⋄⋄⋄⋄⋄│ +│000000f0│ 00 00 00 00 00 00 00 00 ┊ 00 00 00 00 00 00 00 00 │⋄⋄⋄⋄⋄⋄⋄⋄┊⋄⋄⋄⋄⋄⋄⋄⋄│ +│* │ ┊ │ ┊ │ +│00001000│ ba 0e 00 00 00 b9 00 20 ┊ 40 00 bb 01 00 00 00 b8 │║♫⋄⋄⋄╣⋄ ┊@⋄╗☺⋄⋄⋄╕│ +│00001010│ 04 00 00 00 cd 80 b8 01 ┊ 00 00 00 cd 80 00 00 00 │♦⋄⋄⋄═Ç╕☺┊⋄⋄⋄═Ç⋄⋄⋄│ +│00001020│ 00 00 00 00 00 00 00 00 ┊ 00 00 00 00 00 00 00 00 │⋄⋄⋄⋄⋄⋄⋄⋄┊⋄⋄⋄⋄⋄⋄⋄⋄│ +│* │ ┊ │ ┊ │ +│00002000│ 48 65 6c 6c 6f 2c 20 77 ┊ 6f 72 6c 64 21 0a 00 2e │Hello, w┊orld!◙⋄.│ +│00002010│ 73 68 73 74 72 74 61 62 ┊ 00 2e 74 65 78 74 00 2e │shstrtab┊⋄.text⋄.│ +│00002020│ 64 61 74 61 00 00 00 00 ┊ 00 00 00 00 00 00 00 00 │data⋄⋄⋄⋄┊⋄⋄⋄⋄⋄⋄⋄⋄│ +│00002030│ 00 00 00 00 00 00 00 00 ┊ 00 00 00 00 00 00 00 00 │⋄⋄⋄⋄⋄⋄⋄⋄┊⋄⋄⋄⋄⋄⋄⋄⋄│ +│* │ ┊ │ ┊ │ +│00002060│ 00 00 00 00 00 00 00 00 ┊ 0b 00 00 00 01 00 00 00 │⋄⋄⋄⋄⋄⋄⋄⋄┊♂⋄⋄⋄☺⋄⋄⋄│ +│00002070│ 06 00 00 00 00 00 00 00 ┊ 00 10 40 00 00 00 00 00 │♠⋄⋄⋄⋄⋄⋄⋄┊⋄►@⋄⋄⋄⋄⋄│ +│00002080│ 00 10 00 00 00 00 00 00 ┊ 1d 00 00 00 00 00 00 00 │⋄►⋄⋄⋄⋄⋄⋄┊↔⋄⋄⋄⋄⋄⋄⋄│ +│00002090│ 00 00 00 00 00 00 00 00 ┊ 10 00 00 00 00 00 00 00 │⋄⋄⋄⋄⋄⋄⋄⋄┊►⋄⋄⋄⋄⋄⋄⋄│ +│000020a0│ 00 00 00 00 00 00 00 00 ┊ 11 00 00 00 01 00 00 00 │⋄⋄⋄⋄⋄⋄⋄⋄┊◄⋄⋄⋄☺⋄⋄⋄│ +│000020b0│ 03 00 00 00 00 00 00 00 ┊ 00 20 40 00 00 00 00 00 │♥⋄⋄⋄⋄⋄⋄⋄┊⋄ @⋄⋄⋄⋄⋄│ +│000020c0│ 00 20 00 00 00 00 00 00 ┊ 0e 00 00 00 00 00 00 00 │⋄ ⋄⋄⋄⋄⋄⋄┊♫⋄⋄⋄⋄⋄⋄⋄│ +│000020d0│ 00 00 00 00 00 00 00 00 ┊ 04 00 00 00 00 00 00 00 │⋄⋄⋄⋄⋄⋄⋄⋄┊♦⋄⋄⋄⋄⋄⋄⋄│ +│000020e0│ 00 00 00 00 00 00 00 00 ┊ 01 00 00 00 03 00 00 00 │⋄⋄⋄⋄⋄⋄⋄⋄┊☺⋄⋄⋄♥⋄⋄⋄│ +│000020f0│ 00 00 00 00 00 00 00 00 ┊ 00 00 00 00 00 00 00 00 │⋄⋄⋄⋄⋄⋄⋄⋄┊⋄⋄⋄⋄⋄⋄⋄⋄│ +│00002100│ 0e 20 00 00 00 00 00 00 ┊ 17 00 00 00 00 00 00 00 │♫ ⋄⋄⋄⋄⋄⋄┊↨⋄⋄⋄⋄⋄⋄⋄│ +│00002110│ 00 00 00 00 00 00 00 00 ┊ 01 00 00 00 00 00 00 00 │⋄⋄⋄⋄⋄⋄⋄⋄┊☺⋄⋄⋄⋄⋄⋄⋄│ +│00002120│ 00 00 00 00 00 00 00 00 ┊ │⋄⋄⋄⋄⋄⋄⋄⋄┊ │ +└────────┴─────────────────────────┴─────────────────────────┴────────┴────────┘ +", + ); + } +}