Skip to content

Commit

Permalink
fix!: utf-8: Use with-utf8 to ensure all files are read and written w…
Browse files Browse the repository at this point in the history
…ith utf8 encoding. (#1619)

May also fix #1154, #1033, #708, #536, #73: testing is needed.

This should hopefully avoid encoding issues, but since it fundamentally
alters how encoding is dealt with it may lead to unexpected outcomes.
Widespread testing on a number of different platforms would be useful.
  • Loading branch information
Xitian9 committed May 10, 2022
1 parent db26456 commit e1e184e
Show file tree
Hide file tree
Showing 33 changed files with 120 additions and 111 deletions.
2 changes: 1 addition & 1 deletion bin/hledger-balance-as-budget.hs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
{-| Construct two balance reports for two different time periods and use one of the as "budget" for
the other, thus comparing them
-}
import Data.Text.Lazy.IO as TL
import Data.Text.Lazy.IO as TL (putStrLn) -- Only putStr and friends are safe
import System.Environment (getArgs)
import Hledger.Cli

Expand Down
2 changes: 1 addition & 1 deletion bin/hledger-check-fancyassertions.hs
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ import Data.Time.Calendar (toGregorian)
import Data.Time.Calendar.OrdinalDate (mondayStartWeek, sundayStartWeek, toOrdinalDate)
import Data.Text (Text, isPrefixOf, pack, unpack)
import qualified Data.Text as T
import qualified Data.Text.IO as T
import qualified Data.Text.IO as T (putStrLn) -- Only putStr and friends are safe
import qualified Hledger.Data as H
import qualified Hledger.Query as H
import qualified Hledger.Read as H
Expand Down
2 changes: 1 addition & 1 deletion bin/hledger-combine-balances.hs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ import System.Environment (getArgs)
import Hledger.Cli
import qualified Data.Map as M
import Data.Map.Merge.Strict
import qualified Data.Text.Lazy.IO as TL
import qualified Data.Text.Lazy.IO as TL (putStrLn) -- Only putStr and friends are safe

appendReports :: MultiBalanceReport -> MultiBalanceReport -> MultiBalanceReport
appendReports r1 r2 =
Expand Down
2 changes: 1 addition & 1 deletion bin/hledger-swap-dates.hs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
{-# LANGUAGE RecordWildCards #-}

import Data.String.QQ (s)
import qualified Data.Text.IO as T
import qualified Data.Text.IO as T (putStrLn) -- Only putStr and friends are safe
import Hledger
import Hledger.Cli

Expand Down
6 changes: 3 additions & 3 deletions hledger-lib/Hledger/Data/PeriodicTransaction.hs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ module Hledger.Data.PeriodicTransaction (
where

import qualified Data.Text as T
import qualified Data.Text.IO as T
import qualified Data.Text.IO as TIO (putStr) -- Only putStr and friends are safe
import Text.Printf

import Hledger.Data.Types
Expand All @@ -36,7 +36,7 @@ _ptgen str = do
case checkPeriodicTransactionStartDate i s t of
Just e -> error' e -- PARTIAL:
Nothing ->
mapM_ (T.putStr . showTransaction) $
mapM_ (TIO.putStr . showTransaction) $
runPeriodicTransaction
nullperiodictransaction{ ptperiodexpr=t , ptspan=s, ptinterval=i, ptpostings=["a" `post` usd 1] }
nulldatespan
Expand All @@ -48,7 +48,7 @@ _ptgenspan str span = do
case checkPeriodicTransactionStartDate i s t of
Just e -> error' e -- PARTIAL:
Nothing ->
mapM_ (T.putStr . showTransaction) $
mapM_ (TIO.putStr . showTransaction) $
runPeriodicTransaction
nullperiodictransaction{ ptperiodexpr=t , ptspan=s, ptinterval=i, ptpostings=["a" `post` usd 1] }
span
Expand Down
4 changes: 2 additions & 2 deletions hledger-lib/Hledger/Data/TransactionModifier.hs
Original file line number Diff line number Diff line change
Expand Up @@ -63,10 +63,10 @@ modifyTransactions atypes atags styles d tmods ts = do
-- Currently the only kind of modification possible is adding automated
-- postings when certain other postings are present.
--
-- >>> import qualified Data.Text.IO as T
-- >>> import qualified Data.Text.IO as TIO (putStr) -- Only putStr and friends are safe
-- >>> t = nulltransaction{tpostings=["ping" `post` usd 1]}
-- >>> tmpost acc amt = TMPostingRule (acc `post` amt) False
-- >>> test = either putStr (T.putStr.showTransaction) . fmap ($ t) . transactionModifierToFunction (const Nothing) (const []) mempty nulldate
-- >>> test = either putStr (TIO.putStr.showTransaction) . fmap ($ t) . transactionModifierToFunction (const Nothing) (const []) mempty nulldate
-- >>> test $ TransactionModifier "" ["pong" `tmpost` usd 2]
-- 0000-01-01
-- ping $1.00
Expand Down
6 changes: 3 additions & 3 deletions hledger-lib/Hledger/Read.hs
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ import Data.Ord (comparing)
import Data.Semigroup (sconcat)
import Data.Text (Text)
import qualified Data.Text as T
import qualified Data.Text.IO as T
import Data.Text.IO.Utf8 (writeFile)
import Data.Time (Day)
import Safe (headDef)
import System.Directory (doesFileExist, getHomeDirectory)
Expand Down Expand Up @@ -232,7 +232,7 @@ ensureJournalFileExists f = do
hPutStr stderr $ "Creating hledger journal file " <> show f <> ".\n"
-- note Hledger.Utils.UTF8.* do no line ending conversion on windows,
-- we currently require unix line endings on all platforms.
newJournalContent >>= T.writeFile f
newJournalContent >>= writeFile f

-- | Does any part of this path contain non-. characters and end with a . ?
-- Such paths are not safe to use on Windows (cf #1056).
Expand All @@ -259,7 +259,7 @@ latestDates = headDef [] . take 1 . group . reverse . sort
-- | Remember that these transaction dates were the latest seen when
-- reading this journal file.
saveLatestDates :: LatestDates -> FilePath -> IO ()
saveLatestDates dates f = T.writeFile (latestDatesFileFor f) $ T.unlines $ map showDate dates
saveLatestDates dates f = writeFile (latestDatesFileFor f) $ T.unlines $ map showDate dates

-- | What were the latest transaction dates seen the last time this
-- journal file was read ? If there were multiple transactions on the
Expand Down
32 changes: 17 additions & 15 deletions hledger-lib/Hledger/Read/CsvReader.hs
Original file line number Diff line number Diff line change
Expand Up @@ -37,37 +37,39 @@ module Hledger.Read.CsvReader (
where

--- ** imports
import Prelude hiding (getContents, writeFile)
import Control.Applicative (liftA2)
import Control.Monad (unless, when)
import Control.Monad.Except (ExceptT(..), liftEither, throwError)
import qualified Control.Monad.Fail as Fail
import Control.Monad.IO.Class (MonadIO, liftIO)
import Control.Monad.State.Strict (StateT, get, modify', evalStateT)
import Control.Monad.Trans.Class (lift)
import Data.Char (toLower, isDigit, isSpace, isAlphaNum, ord)
import Data.Bifunctor (first)
import Data.Functor ((<&>))
import qualified Data.ByteString as B
import qualified Data.ByteString.Lazy as BL
import Data.Char (toLower, isDigit, isSpace, isAlphaNum, ord)
import qualified Data.Csv as Cassava
import qualified Data.Csv.Parser.Megaparsec as CassavaMP
import Data.Foldable (asum, toList)
import Data.List (elemIndex, foldl', intersperse, mapAccumL, nub, sortBy)
import Data.Maybe (catMaybes, fromMaybe, isJust)
import Data.MemoUgly (memo)
import Data.Ord (comparing)
import qualified Data.Set as S
import Data.Text (Text)
import qualified Data.Text as T
import qualified Data.Text.Encoding as T
import qualified Data.Text.IO as T
import Data.Text.Encoding (decodeUtf8, encodeUtf8)
import Data.Text.IO (getContents) -- Only putStr and friends are safe
import Data.Text.IO.Utf8 (writeFile)
import qualified Data.Text.Lazy as TL
import qualified Data.Text.Lazy.Builder as TB
import Data.Time.Calendar (Day)
import Data.Time.Format (parseTimeM, defaultTimeLocale)
import Safe (atMay, headMay, lastMay, readMay)
import System.Directory (doesFileExist)
import System.FilePath ((</>), takeDirectory, takeExtension, takeFileName)
import qualified Data.Csv as Cassava
import qualified Data.Csv.Parser.Megaparsec as CassavaMP
import qualified Data.ByteString as B
import qualified Data.ByteString.Lazy as BL
import Data.Foldable (asum, toList)
import Text.Megaparsec hiding (match, parse)
import Text.Megaparsec.Char (char, newline, string)
import Text.Megaparsec.Custom (customErrorBundlePretty, parseErrorAt)
Expand Down Expand Up @@ -197,7 +199,7 @@ expandIncludes dir content = mapM (expandLine dir) (T.lines content) >>= return
where
expandLine dir line =
case line of
(T.stripPrefix "include " -> Just f) -> expandIncludes dir' =<< T.readFile f'
(T.stripPrefix "include " -> Just f) -> expandIncludes dir' =<< readFilePortably f'
where
f' = dir </> T.unpack (T.dropWhile isSpace f)
dir' = takeDirectory f'
Expand Down Expand Up @@ -745,8 +747,8 @@ readJournalFromCsv mrulesfile csvfile csvdata = do
-- than one date and the first date is more recent than the last):
-- reverse them to get same-date transactions ordered chronologically.
txns' =
(if newestfirst || mdataseemsnewestfirst == Just True
then dbg7 "reversed csv txns" . reverse else id)
(if newestfirst || mdataseemsnewestfirst == Just True
then dbg7 "reversed csv txns" . reverse else id)
txns
where
newestfirst = dbg6 "newestfirst" $ isJust $ getDirective "newest-first" rules
Expand All @@ -759,7 +761,7 @@ readJournalFromCsv mrulesfile csvfile csvdata = do

liftIO $ when (not rulesfileexists) $ do
dbg1IO "creating conversion rules file" rulesfile
T.writeFile rulesfile rulestext
writeFile rulesfile rulestext

return nulljournal{jtxns=txns''}

Expand All @@ -774,14 +776,14 @@ parseSeparator = specials . T.toLower
parseCsv :: Char -> FilePath -> Text -> ExceptT String IO CSV
parseCsv separator filePath csvdata = ExceptT $
case filePath of
"-" -> parseCassava separator "(stdin)" <$> T.getContents
"-" -> parseCassava separator "(stdin)" <$> getContents
_ -> return $ if T.null csvdata then Right mempty else parseCassava separator filePath csvdata

parseCassava :: Char -> FilePath -> Text -> Either String CSV
parseCassava separator path content =
either (Left . errorBundlePretty) (Right . parseResultToCsv) <$>
CassavaMP.decodeWith (decodeOptions separator) Cassava.NoHeader path $
BL.fromStrict $ T.encodeUtf8 content
BL.fromStrict $ encodeUtf8 content

decodeOptions :: Char -> Cassava.DecodeOptions
decodeOptions separator = Cassava.defaultDecodeOptions {
Expand All @@ -792,7 +794,7 @@ parseResultToCsv :: (Foldable t, Functor t) => t (t B.ByteString) -> CSV
parseResultToCsv = toListList . unpackFields
where
toListList = toList . fmap toList
unpackFields = (fmap . fmap) T.decodeUtf8
unpackFields = (fmap . fmap) decodeUtf8

printCSV :: CSV -> TL.Text
printCSV = TB.toLazyText . unlinesB . map printRecord
Expand Down
21 changes: 7 additions & 14 deletions hledger-lib/Hledger/Utils.hs
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,12 @@ module Hledger.Utils (---- provide these frequently used modules - or not, for c
)
where

import Control.Monad (when)
import Data.Char (toLower)
import Data.FileEmbed (makeRelativeToProject, embedStringFile)
import Data.List.Extra (foldl', foldl1', uncons, unsnoc)
import qualified Data.Set as Set
import Data.Text (Text)
import qualified Data.Text.IO as T
import qualified Data.Text.IO as TIO (hGetContents) -- Only putStr and friends are safe
import qualified Data.Text.Lazy.Builder as TB
import Data.Time.Clock (getCurrentTime)
import Data.Time.LocalTime (LocalTime, ZonedTime, getCurrentTimeZone,
Expand All @@ -48,10 +47,9 @@ import Lens.Micro ((&), (.~))
import Lens.Micro.TH (DefName(TopName), lensClass, lensField, makeLensesWith, classyRules)
import System.Console.ANSI (Color,ColorIntensity,ConsoleLayer(..), SGR(..), setSGRCode)
import System.Directory (getHomeDirectory)
import System.FilePath (isRelative, (</>))
import System.IO
(Handle, IOMode (..), hGetEncoding, hSetEncoding, hSetNewlineMode,
openFile, stdin, universalNewlineMode, utf8_bom)
import System.FilePath ((</>), isRelative)
import System.IO (Handle, IOMode (..), hSetNewlineMode, stdin, universalNewlineMode)
import qualified System.IO.Utf8 as Utf8

import Hledger.Utils.Debug
import Hledger.Utils.Parse
Expand Down Expand Up @@ -175,23 +173,18 @@ expandHomePath = \case
-- using the system locale's text encoding,
-- ignoring any utf8 BOM prefix (as seen in paypal's 2018 CSV, eg) if that encoding is utf8.
readFilePortably :: FilePath -> IO Text
readFilePortably f = openFile f ReadMode >>= readHandlePortably
readFilePortably f = Utf8.openFile f ReadMode >>= readHandlePortably

-- | Like readFilePortably, but read from standard input if the path is "-".
readFileOrStdinPortably :: String -> IO Text
readFileOrStdinPortably f = openFileOrStdin f ReadMode >>= readHandlePortably
where
openFileOrStdin :: String -> IOMode -> IO Handle
openFileOrStdin "-" _ = return stdin
openFileOrStdin f m = openFile f m
openFileOrStdin f m = Utf8.openFile f m

readHandlePortably :: Handle -> IO Text
readHandlePortably h = do
hSetNewlineMode h universalNewlineMode
menc <- hGetEncoding h
when (fmap show menc == Just "UTF-8") $ -- XXX no Eq instance, rely on Show
hSetEncoding h utf8_bom
T.hGetContents h
readHandlePortably h = hSetNewlineMode h universalNewlineMode >> TIO.hGetContents h

-- | Total version of maximum, for integral types, giving 0 for an empty list.
maximum' :: Integral a => [a] -> a
Expand Down
5 changes: 4 additions & 1 deletion hledger-lib/hledger-lib.cabal
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
cabal-version: 1.12

-- This file has been generated from package.yaml by hpack version 0.34.4.
-- This file has been generated from package.yaml by hpack version 0.34.7.
--
-- see: https://github.com/sol/hpack

Expand Down Expand Up @@ -137,6 +137,7 @@ library
, uglymemo
, unordered-containers >=0.2
, utf8-string >=0.3.5
, with-utf8 >=1.0.0
default-language: Haskell2010

test-suite doctest
Expand Down Expand Up @@ -188,6 +189,7 @@ test-suite doctest
, uglymemo
, unordered-containers >=0.2
, utf8-string >=0.3.5
, with-utf8 >=1.0.0
if impl(ghc < 9.2)
buildable: False
default-language: Haskell2010
Expand Down Expand Up @@ -241,5 +243,6 @@ test-suite unittest
, uglymemo
, unordered-containers >=0.2
, utf8-string >=0.3.5
, with-utf8 >=1.0.0
buildable: True
default-language: Haskell2010
5 changes: 3 additions & 2 deletions hledger-lib/package.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,10 @@ dependencies:
- Decimal >=0.5.1
- directory
- doclayout >=0.3 && <0.4
- extra >=1.6.3
- file-embed >=0.0.10
- filepath
- Glob >= 0.9
- hashtables >=1.2.3.1
- megaparsec >=7.0.0 && <9.3
- microlens >=0.4
Expand All @@ -70,8 +72,7 @@ dependencies:
- unordered-containers >=0.2
- uglymemo
- utf8-string >=0.3.5
- extra >=1.6.3
- Glob >= 0.9
- with-utf8 >=1.0.0
# for ledger-parse:
#- parsers >=0.5
#- system-filepath
Expand Down
4 changes: 2 additions & 2 deletions hledger/Hledger/Cli/Commands/Accounts.hs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ module Hledger.Cli.Commands.Accounts (

import Data.List
import qualified Data.Text as T
import qualified Data.Text.IO as T
import qualified Data.Text.IO as TIO (putStrLn) -- Only putStr and friends are safe
import System.Console.CmdArgs.Explicit as C

import Hledger
Expand Down Expand Up @@ -96,4 +96,4 @@ accounts CliOpts{rawopts_=rawopts, reportspec_=ReportSpec{_rsQuery=query,_rsRepo
where
spacer = T.replicate (maxwidth - T.length (showName a)) " "
maxwidth = maximum $ map (T.length . showName) clippedaccts
forM_ clippedaccts $ \a -> T.putStrLn $ showName a <> showType a
forM_ clippedaccts $ \a -> TIO.putStrLn $ showName a <> showType a
10 changes: 5 additions & 5 deletions hledger/Hledger/Cli/Commands/Add.hs
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,9 @@ import Data.List (isPrefixOf)
import Data.Maybe (fromJust, fromMaybe, isJust)
import Data.Text (Text)
import qualified Data.Text as T
import qualified Data.Text.IO as T
import qualified Data.Text.IO as TIO (hPutStr, putStr) -- Only putStr and friends are safe
import qualified Data.Text.Lazy as TL
import qualified Data.Text.Lazy.IO as TL
import qualified Data.Text.Lazy.IO as TLIO (putStrLn) -- Only putStr and friends are safe
import Data.Time.Calendar (Day)
import Data.Time.Format (formatTime, defaultTimeLocale)
import Lens.Micro ((^.))
Expand Down Expand Up @@ -184,7 +184,7 @@ confirmedTransactionWizard prevInput es@EntryState{..} stack@(currentStage : _)
prevInput' = prevInput{prevDescAndCmnt=Just descAndCommentString}
when (isJust mbaset) . liftIO $ do
hPutStrLn stderr "Using this similar transaction for defaults:"
T.hPutStr stderr $ showTransaction (fromJust mbaset)
TIO.hPutStr stderr $ showTransaction (fromJust mbaset)
confirmedTransactionWizard prevInput' es' ((EnterNewPosting TxnParams{txnDate=date, txnCode=code, txnDesc=desc, txnCmnt=comment} Nothing) : stack)
Nothing ->
confirmedTransactionWizard prevInput es (drop 1 stack)
Expand Down Expand Up @@ -435,7 +435,7 @@ journalAddTransaction j@Journal{jtxns=ts} opts t = do
-- unelided shows all amounts explicitly, in case there's a price, cf #283
when (debug_ opts > 0) $ do
putStrLn $ printf "\nAdded transaction to %s:" f
TL.putStrLn =<< registerFromString (showTransaction t)
TLIO.putStrLn =<< registerFromString (showTransaction t)
return j{jtxns=ts++[t]}

-- | Append a string, typically one or more transactions, to a journal
Expand All @@ -448,7 +448,7 @@ journalAddTransaction j@Journal{jtxns=ts} opts t = do
--
appendToJournalFileOrStdout :: FilePath -> Text -> IO ()
appendToJournalFileOrStdout f s
| f == "-" = T.putStr s'
| f == "-" = TIO.putStr s'
| otherwise = appendFile f $ T.unpack s'
where s' = "\n" <> ensureOneNewlineTerminated s

Expand Down
4 changes: 2 additions & 2 deletions hledger/Hledger/Cli/Commands/Checkdates.hs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ module Hledger.Cli.Commands.Checkdates (
) where

import qualified Data.Text as T
import qualified Data.Text.IO as T
import qualified Data.Text.IO as TIO (putStrLn) -- Only putStr and friends are safe
import Hledger
import Hledger.Cli.CliOptions
import System.Console.CmdArgs.Explicit
Expand Down Expand Up @@ -43,7 +43,7 @@ checkdates CliOpts{rawopts_=rawopts,reportspec_=rspec} j = do
positionstr = T.pack . showGenericSourcePos $ tsourcepos error
txn1str = linesPrepend (T.pack " ") $ showTransaction previous
txn2str = linesPrepend2 (T.pack "> ") (T.pack " ") $ showTransaction error
T.putStrLn $
TIO.putStrLn $
T.pack "Error: transaction date is out of order"
<> uniquestr <> T.pack "\nat " <> positionstr <> T.pack ":\n\n"
<> txn1str <> txn2str
Expand Down
Loading

0 comments on commit e1e184e

Please sign in to comment.