From ceb90ce80281ef5a9e4ba25b802099bc85bc7e2b Mon Sep 17 00:00:00 2001 From: Marc Scholten Date: Fri, 5 Jun 2026 17:19:12 +0200 Subject: [PATCH] Stream IMAP fetch literals --- HaskellNet.cabal | 1 + src/Network/HaskellNet/IMAP.hs | 371 ++++++++++++++++++++++++++++++--- test/IMAPParsersTest.hs | 73 +++++++ 3 files changed, 417 insertions(+), 28 deletions(-) diff --git a/HaskellNet.cabal b/HaskellNet.cabal index 657309e..106069c 100644 --- a/HaskellNet.cabal +++ b/HaskellNet.cabal @@ -95,5 +95,6 @@ Test-suite imap-parsers default-language: Haskell2010 Build-Depends: base >= 4.3 && < 4.22, + bytestring >=0.10.2 && < 0.13, HaskellNet, HUnit >= 1.6 && < 1.7 diff --git a/src/Network/HaskellNet/IMAP.hs b/src/Network/HaskellNet/IMAP.hs index 69af0bd..76e27ee 100644 --- a/src/Network/HaskellNet/IMAP.hs +++ b/src/Network/HaskellNet/IMAP.hs @@ -16,6 +16,7 @@ module Network.HaskellNet.IMAP -- * fetch commands , fetch, fetchHeader, fetchSize, fetchHeaderFields, fetchHeaderFieldsNot , fetchFlags, fetchR, fetchByString, fetchByStringR + , fetchByByteString, fetchByByteStringR , fetchPeek, fetchRPeek -- * other types , Flag(..), Attribute(..), MailboxStatus(..) @@ -141,11 +142,16 @@ connectStream s = -- normal send commands sendCommand' :: IMAPConnection -> String -> IO (ByteString, Int) sendCommand' c cmdstr = do - (_, num) <- withNextCommandNum c $ \num -> bsPutCrLf (stream c) $ - BS.pack $ show6 num ++ " " ++ cmdstr + num <- sendCommandNoResponse c cmdstr resp <- getResponse (stream c) return (resp, num) +sendCommandNoResponse :: IMAPConnection -> String -> IO Int +sendCommandNoResponse c cmdstr = do + (_, num) <- withNextCommandNum c $ \num -> bsPutCrLf (stream c) $ + BS.pack $ show6 num ++ " " ++ cmdstr + return num + show6 :: (Ord a, Num a, Show a) => a -> String show6 n | n > 100000 = show n | n > 10000 = '0' : show n @@ -372,76 +378,371 @@ searchCharset conn charset queries = fetch :: IMAPConnection -> UID -> IO ByteString fetch conn uid = - do lst <- fetchByString conn uid "BODY[]" - return $ maybe BS.empty BS.pack $ lookup' "BODY[]" lst + do lst <- fetchByByteString conn uid "BODY[]" + return $ fromMaybe BS.empty $ lookup' "BODY[]" lst -- | Like 'fetch' but without marking the email as seen/read fetchPeek :: IMAPConnection -> UID -> IO ByteString fetchPeek conn uid = - do lst <- fetchByString conn uid "BODY.PEEK[]" - return $ maybe BS.empty BS.pack $ lookup' "BODY[]" lst + do lst <- fetchByByteString conn uid "BODY.PEEK[]" + return $ fromMaybe BS.empty $ lookup' "BODY[]" lst fetchHeader :: IMAPConnection -> UID -> IO ByteString fetchHeader conn uid = - do lst <- fetchByString conn uid "BODY[HEADER]" - return $ maybe BS.empty BS.pack $ lookup' "BODY[HEADER]" lst + do lst <- fetchByByteString conn uid "BODY[HEADER]" + return $ fromMaybe BS.empty $ lookup' "BODY[HEADER]" lst fetchSize :: IMAPConnection -> UID -> IO Int fetchSize conn uid = - do lst <- fetchByString conn uid "RFC822.SIZE" - return $ maybe 0 read $ lookup "RFC822.SIZE" lst + do lst <- fetchByByteString conn uid "RFC822.SIZE" + return $ maybe 0 (read . BS.unpack) $ lookup "RFC822.SIZE" lst fetchHeaderFields :: IMAPConnection -> UID -> [String] -> IO ByteString fetchHeaderFields conn uid hs = do let fetchCmd = "BODY[HEADER.FIELDS ("++unwords hs++")]" - lst <- fetchByString conn uid fetchCmd - return $ maybe BS.empty BS.pack $ lookup' fetchCmd lst + lst <- fetchByByteString conn uid fetchCmd + return $ fromMaybe BS.empty $ lookup' fetchCmd lst fetchHeaderFieldsNot :: IMAPConnection -> UID -> [String] -> IO ByteString fetchHeaderFieldsNot conn uid hs = do let fetchCmd = "BODY[HEADER.FIELDS.NOT ("++unwords hs++")]" - lst <- fetchByString conn uid fetchCmd - return $ maybe BS.empty BS.pack $ lookup' fetchCmd lst + lst <- fetchByByteString conn uid fetchCmd + return $ fromMaybe BS.empty $ lookup' fetchCmd lst fetchFlags :: IMAPConnection -> UID -> IO [Flag] fetchFlags conn uid = - do lst <- fetchByString conn uid "FLAGS" + do lst <- fetchByByteString conn uid "FLAGS" return $ getFlags $ lookup "FLAGS" lst where getFlags Nothing = [] - getFlags (Just s) = eval' dvFlags "" s + getFlags (Just s) = eval' dvFlags "" (BS.unpack s) fetchR :: IMAPConnection -> (UID, UID) -> IO [(UID, ByteString)] fetchR conn r = - do lst <- fetchByStringR conn r "BODY[]" - return $ map (\(uid, vs) -> (uid, maybe BS.empty BS.pack $ + do lst <- fetchByByteStringR conn r "BODY[]" + return $ map (\(uid, vs) -> (uid, fromMaybe BS.empty $ lookup' "BODY[]" vs)) lst -- | Like 'fetchR' but without marking the email as seen/read fetchRPeek :: IMAPConnection -> (UID, UID) -> IO [(UID, ByteString)] fetchRPeek conn range = - do ls <- fetchByStringR conn range "BODY.PEEK[]" - return $ map (\(uid, vs) -> (uid, maybe BS.empty BS.pack $ lookup' "BODY[]" vs)) ls + do ls <- fetchByByteStringR conn range "BODY.PEEK[]" + return $ map (\(uid, vs) -> (uid, fromMaybe BS.empty $ lookup' "BODY[]" vs)) ls fetchByString :: IMAPConnection -> UID -> String -> IO [(String, String)] fetchByString conn uid command = - do lst <- fetchCommand conn ("UID FETCH "++show uid++" "++command) id - return $ snd $ head lst + map (\(key, value) -> (key, BS.unpack value)) <$> fetchByByteString conn uid command + +fetchByByteString :: IMAPConnection -> UID -> String + -> IO [(String, ByteString)] +fetchByByteString conn uid command = + do lst <- fetchCommandBS conn ("UID FETCH "++show uid++" "++command) id + case lst of + (_, pairs):_ -> return pairs + [] -> return [] fetchByStringR :: IMAPConnection -> (UID, UID) -> String -> IO [(UID, [(String, String)])] fetchByStringR conn (s, e) command = - fetchCommand conn ("UID FETCH "++show s++":"++show e++" "++command) proc + map unpackFetch <$> fetchByByteStringR conn (s, e) command + where unpackFetch (uid, pairs) = + (uid, map (\(key, value) -> (key, BS.unpack value)) pairs) + +fetchByByteStringR :: IMAPConnection -> (UID, UID) -> String + -> IO [(UID, [(String, ByteString)])] +fetchByByteStringR conn (s, e) command = + fetchCommandBS conn ("UID FETCH "++show s++":"++show e++" "++command) proc where proc (n, ps) = - (maybe (toEnum (fromIntegral n)) read (lookup' "UID" ps), ps) + (maybe (toEnum (fromIntegral n)) (read . BS.unpack) (lookup' "UID" ps), ps) fetchCommand :: IMAPConnection -> String -> ((Integer, [(String, String)]) -> b) -> IO [b] fetchCommand conn command proc = - (map proc) <$> sendCommand conn command pFetch + fetchCommandBS conn command $ \(n, ps) -> + proc (n, map (\(key, value) -> (key, BS.unpack value)) ps) + +fetchCommandBS :: IMAPConnection -> String + -> ((Integer, [(String, ByteString)]) -> b) -> IO [b] +fetchCommandBS conn command proc = + do num <- sendCommandNoResponse conn command + (resp, mboxUp, values) <- getFetchResponseBS (stream conn) (show6 num) + case resp of + OK _ _ -> do mboxUpdate conn mboxUp + return $ map proc values + NO _ msg -> fail ("NO: " ++ msg) + BAD _ msg -> fail ("BAD: " ++ msg) + PREAUTH _ msg -> fail ("preauth: " ++ msg) + +getFetchResponseBS :: BSStream -> String + -> IO (ServerResponse, MboxUpdate, [(Integer, [(String, ByteString)])]) +getFetchResponseBS s tag = go Nothing Nothing [] + where + go exists' recent' fetches = do + line <- strip <$> bsGetLine s + case parseTaggedOrFatalLineBS tag line of + Just resp -> return (resp, MboxUpdate exists' recent', reverse fetches) + Nothing -> + case parseFetchLineStartBS line of + Just fetchInput -> do + fetchLine <- parseFetchLineBS s fetchInput + go exists' recent' (fetchLine:fetches) + Nothing -> + case parseNumberedUpdateLineBS "EXISTS" line of + Just n -> go (Just n) recent' fetches + Nothing -> + case parseNumberedUpdateLineBS "RECENT" line of + Just n -> go exists' (Just n) fetches + Nothing -> do + skipLineLiteralsBS s line + go exists' recent' fetches + +parseTaggedOrFatalLineBS :: String -> ByteString -> Maybe ServerResponse +parseTaggedOrFatalLineBS tag line = + if isTaggedLine line || isFatalLine line + then let (resp, _, ()) = eval pNone tag (BS.append line crlf) + in Just resp + else Nothing + where + isTaggedLine = BS.isPrefixOf (BS.pack $ tag ++ " ") + isFatalLine input = + case stripPrefixBS (BS.pack "* ") input of + Just rest -> startsWithCIBS (BS.pack "BYE") rest + Nothing -> False + +parseFetchLineStartBS :: ByteString -> Maybe (Integer, ByteString) +parseFetchLineStartBS input = + do rest1 <- stripPrefixBS (BS.pack "* ") input + (numBytes, rest2) <- span1BS isDigit rest1 + rest3 <- stripSpaces1BS rest2 + rest4 <- stripPrefixCIBS (BS.pack "FETCH") rest3 + rest5 <- stripCharBS '(' $ dropSpacesBS rest4 + return (read $ BS.unpack numBytes, rest5) + +parseFetchLineBS :: BSStream -> (Integer, ByteString) + -> IO (Integer, [(String, ByteString)]) +parseFetchLineBS s (num, input) = + do pairs <- parseFetchPairsBS s [] input + return (num, pairs) + +parseFetchPairsBS :: BSStream -> [(String, ByteString)] -> ByteString + -> IO [(String, ByteString)] +parseFetchPairsBS s pairs input = + case BS.uncons input of + Just (')', rest) | BS.null (dropSpacesBS rest) -> return $ reverse pairs + _ -> case parseFetchKeyBS input of + Just (key, rest1) -> do + valueResult <- parseFetchValueBS s rest1 + case valueResult of + Just (value, rest2) -> + parseFetchPairsBS s ((key, value):pairs) $ dropSpacesBS rest2 + Nothing -> fetchParseError "cannot parse FETCH value" input + Nothing -> fetchParseError "cannot parse FETCH key" input + +parseFetchKeyBS :: ByteString -> Maybe (String, ByteString) +parseFetchKeyBS input = + let (name, rest1) = BS.span isFetchKeyChar input + in if BS.null name + then Nothing + else do (section, rest2) <- parseFetchSectionBS rest1 + rest3 <- stripSpaces1BS rest2 + return (map toUpper (BS.unpack name) ++ BS.unpack section, rest3) + where + isFetchKeyChar c = not $ c `elem` " [)\r\n" + +parseFetchSectionBS :: ByteString -> Maybe (ByteString, ByteString) +parseFetchSectionBS input = + case BS.uncons input of + Just ('[', rest1) -> + let (sectionBody, rest2) = BS.break (== ']') rest1 + in case BS.uncons rest2 of + Just (']', rest3) -> + do (origin, rest4) <- parseFetchOriginBS rest3 + return (BS.concat [BS.pack "[", sectionBody, BS.pack "]", origin], rest4) + _ -> Nothing + _ -> Just (BS.empty, input) + +parseFetchOriginBS :: ByteString -> Maybe (ByteString, ByteString) +parseFetchOriginBS input = + case BS.uncons input of + Just ('<', rest1) -> + do (digits, rest2) <- span1BS isDigit rest1 + rest3 <- stripCharBS '>' rest2 + return (BS.concat [BS.pack "<", digits, BS.pack ">"], rest3) + _ -> Just (BS.empty, input) + +parseFetchValueBS :: BSStream -> ByteString -> IO (Maybe (ByteString, ByteString)) +parseFetchValueBS s input = + case BS.uncons input of + Just ('(', _) -> return $ parseParenValueBS input + Just ('{', _) -> Just <$> parseLiteralValueBS s input + Just ('~', rest) | BS.take 1 rest == BS.pack "{" -> + Just <$> parseLiteralValueBS s input + Just ('"', _) -> return $ parseQuotedValueBS input + _ -> return $ parseAtomValueBS input + +parseParenValueBS :: ByteString -> Maybe (ByteString, ByteString) +parseParenValueBS input = + do valueLen <- scanParenValueEndBS input + let (value, rest) = BS.splitAt valueLen input + return (value, rest) + +parseLiteralValueBS :: BSStream -> ByteString -> IO (ByteString, ByteString) +parseLiteralValueBS s input = + case parseLiteralMarkerBS input of + Just literalLen -> do + literal <- bsGet s literalLen + if BS.length literal /= literalLen + then fetchParseError "short FETCH literal" input + else do tailLine <- strip <$> bsGetLine s + return (literal, tailLine) + Nothing -> fetchParseError "cannot parse FETCH literal marker" input + +parseLiteralMarkerBS :: ByteString -> Maybe Int +parseLiteralMarkerBS input = + do rest1 <- case BS.uncons input of + Just ('~', rest) -> Just rest + _ -> Just input + rest2 <- stripCharBS '{' rest1 + (lenBytes, rest3) <- span1BS isDigit rest2 + let rest4 = case BS.uncons rest3 of + Just ('+', rest) -> rest + _ -> rest3 + rest5 <- stripCharBS '}' rest4 + if BS.null (dropSpacesBS rest5) + then Just $ read $ BS.unpack lenBytes + else Nothing + +skipLineLiteralsBS :: BSStream -> ByteString -> IO () +skipLineLiteralsBS s line = + case literalLengthAtLineEndBS line of + Just literalLen -> do + _ <- bsGet s literalLen + nextLine <- strip <$> bsGetLine s + skipLineLiteralsBS s nextLine + Nothing -> return () + +literalLengthAtLineEndBS :: ByteString -> Maybe Int +literalLengthAtLineEndBS line = + let stripped = strip line + in if BS.length stripped >= 3 && BS.last stripped == '}' + then parseLiteralTailBS $ reverse $ BS.unpack $ BS.init stripped + else Nothing + where + parseLiteralTailBS revBeforeClose = + case break (== '{') revBeforeClose of + (insideRev, _ : _) -> parseLiteralInsideBS $ reverse insideRev + _ -> Nothing + parseLiteralInsideBS inside = + let digits' = case reverse inside of + '+' : rest -> reverse rest + _ -> inside + in if not (null digits') && all isDigit digits' + then Just $ read digits' + else Nothing + +parseQuotedValueBS :: ByteString -> Maybe (ByteString, ByteString) +parseQuotedValueBS input = + do valueLen <- scanQuotedValueEndBS 1 input + let (value, rest) = BS.splitAt valueLen input + return (value, rest) + +parseAtomValueBS :: ByteString -> Maybe (ByteString, ByteString) +parseAtomValueBS input = + let (value, rest) = BS.span isAtomValueChar input + in if BS.null value + then Nothing + else let normalized = if BS.map toUpper value == BS.pack "NIL" + then BS.empty + else value + in Just (normalized, rest) + where + isAtomValueChar c = not $ c `elem` " (){%*\"\\]\r\n" + +scanParenValueEndBS :: ByteString -> Maybe Int +scanParenValueEndBS input = + case BS.uncons input of + Just ('(', _) -> go 0 0 + _ -> Nothing + where + inputLen = BS.length input + go :: Int -> Int -> Maybe Int + go i depth + | i >= inputLen = Nothing + | otherwise = + case BS.index input i of + '"' -> do next <- scanQuotedValueEndBS (i + 1) input + go next depth + '(' -> go (i + 1) (depth + 1) + ')' | depth == 1 -> Just (i + 1) + | depth > 1 -> go (i + 1) (depth - 1) + | otherwise -> Nothing + _ -> go (i + 1) depth + +scanQuotedValueEndBS :: Int -> ByteString -> Maybe Int +scanQuotedValueEndBS = go + where + go i input + | i >= BS.length input = Nothing + | otherwise = + case BS.index input i of + '\\' -> go (i + 2) input + '"' -> Just (i + 1) + _ -> go (i + 1) input + +parseNumberedUpdateLineBS :: String -> ByteString -> Maybe Integer +parseNumberedUpdateLineBS name input = + do rest1 <- stripPrefixBS (BS.pack "* ") input + (numBytes, rest2) <- span1BS isDigit rest1 + rest3 <- stripSpaces1BS rest2 + rest4 <- stripPrefixCIBS (BS.pack name) rest3 + if BS.null (dropSpacesBS rest4) + then Just $ read $ BS.unpack numBytes + else Nothing + +stripPrefixBS :: ByteString -> ByteString -> Maybe ByteString +stripPrefixBS prefix input = + if prefix `BS.isPrefixOf` input + then Just $ BS.drop (BS.length prefix) input + else Nothing + +stripPrefixCIBS :: ByteString -> ByteString -> Maybe ByteString +stripPrefixCIBS prefix input = + if startsWithCIBS prefix input + then Just $ BS.drop (BS.length prefix) input + else Nothing + +startsWithCIBS :: ByteString -> ByteString -> Bool +startsWithCIBS prefix input = + let prefixLen = BS.length prefix + candidate = BS.take prefixLen input + in BS.length candidate == prefixLen + && BS.map toUpper candidate == BS.map toUpper prefix + +stripCharBS :: Char -> ByteString -> Maybe ByteString +stripCharBS expected input = + case BS.uncons input of + Just (actual, rest) | actual == expected -> Just rest + _ -> Nothing + +span1BS :: (Char -> Bool) -> ByteString -> Maybe (ByteString, ByteString) +span1BS predicate input = + let result@(matching, _) = BS.span predicate input + in if BS.null matching then Nothing else Just result + +dropSpacesBS :: ByteString -> ByteString +dropSpacesBS = BS.dropWhile (== ' ') + +stripSpaces1BS :: ByteString -> Maybe ByteString +stripSpaces1BS input = + let rest = dropSpacesBS input + in if BS.length rest == BS.length input then Nothing else Just rest + +fetchParseError :: String -> ByteString -> a +fetchParseError message input = + error $ message ++ ": " ++ show (BS.take 120 input) storeFull :: IMAPConnection -> String -> FlagsQuery -> Bool -> IO [(UID, [Flag])] @@ -537,10 +838,24 @@ bsPutCrLf h s = bsPut h s >> bsPut h crlf >> bsFlush h lookup' :: String -> [(String, b)] -> Maybe b lookup' _ [] = Nothing -lookup' q ((k,v):xs) | q == query k = return v +lookup' q ((k,v):xs) | matchesFetchKey q k = return v | otherwise = lookup' q xs - where - query = unwords . drop 2 . words + +matchesFetchKey :: String -> String -> Bool +matchesFetchKey expected actual = + expected == actual || normalizeFetchKey expected == normalizeFetchKey actual + +normalizeFetchKey :: String -> String +normalizeFetchKey = stripOrigin . stripPeek . map toUpper + where + stripPeek key = + case stripPrefix "BODY.PEEK[" key of + Just rest -> "BODY[" ++ rest + Nothing -> key + stripOrigin key = + case break (== '<') key of + (bodySection, '<':_) | "BODY[" `isPrefixOf` bodySection -> bodySection + _ -> key -- TODO: This is just a first trial solution for this stack overflow question: -- http://stackoverflow.com/questions/26183675/error-when-fetching-subject-from-email-using-haskellnets-imap diff --git a/test/IMAPParsersTest.hs b/test/IMAPParsersTest.hs index eb6890e..95a2237 100644 --- a/test/IMAPParsersTest.hs +++ b/test/IMAPParsersTest.hs @@ -1,5 +1,12 @@ module Main (main) where +import Data.ByteString (ByteString) +import qualified Data.ByteString as B +import qualified Data.ByteString.Char8 as BS +import Data.IORef +import Network.HaskellNet.BSStream +import qualified Network.HaskellNet.IMAP as IMAP +import Network.HaskellNet.IMAP.Connection import Network.HaskellNet.IMAP.Parsers import Network.HaskellNet.IMAP.Types @@ -7,6 +14,49 @@ import System.Exit import Test.HUnit +data ReadStep = ReadLine ByteString | ReadBytes ByteString + +scriptedStream :: [ReadStep] -> IO BSStream +scriptedStream steps = do + input <- newIORef steps + return BSStream + { bsGetLine = popLine input + , bsGet = popBytes input + , bsPut = \_ -> return () + , bsFlush = return () + , bsClose = return () + , bsIsOpen = return True + , bsWaitForInput = \_ -> return False + } + where + popLine input = do + steps' <- readIORef input + case steps' of + ReadLine line : rest -> writeIORef input rest >> return line + ReadBytes _ : _ -> assertFailure "expected test stream line, got bytes" + [] -> assertFailure "test stream exhausted while reading a line" + + popBytes input n = do + steps' <- readIORef input + case steps' of + ReadBytes bytes : rest -> + let (chunk, remainder) = BS.splitAt n bytes + next = if BS.null remainder then rest else ReadBytes remainder : rest + in writeIORef input next >> return chunk + ReadLine _ : _ -> assertFailure "expected test stream bytes, got a line" + [] -> assertFailure "test stream exhausted while reading bytes" + +scriptedConnection :: [ReadStep] -> IO IMAPConnection +scriptedConnection steps = do + testStream <- scriptedStream steps + newConnection testStream + +line :: String -> ReadStep +line = ReadLine . BS.pack + +okLine :: String -> ReadStep +okLine = line . ("000000 OK " ++) + baseTest = [(OK Nothing "LOGIN Completed", MboxUpdate Nothing Nothing, ()) ~=? eval' pNone "A001" @@ -184,6 +234,28 @@ fetchTest = \a005 OK +FLAGS completed\r\n" ] +imapFetchTest = + [ "fetchByByteString preserves raw literal bytes" ~: TestCase $ do + let body = B.pack [0, 10, 255, 65] + conn <- scriptedConnection + [ line "* 12 FETCH (BODY[] {4}" + , ReadBytes body + , line " UID 42)" + , okLine "FETCH completed" + ] + fetched <- IMAP.fetchByByteString conn 42 "BODY[]" + [("BODY[]", body), ("UID", BS.pack "42")] @=? fetched + , "fetch preserves large literals" ~: TestCase $ do + let body = BS.replicate (1024 * 1024) 'x' + conn <- scriptedConnection + [ line ("* 12 FETCH (BODY[] {" ++ show (BS.length body) ++ "}") + , ReadBytes body + , line " UID 42)" + , okLine "FETCH completed" + ] + fetched <- IMAP.fetch conn 42 + body @=? fetched + ] testData = [ "base" ~: baseTest , "capability" ~: capabilityTest @@ -194,6 +266,7 @@ testData = [ "base" ~: baseTest , "expunge" ~: expungeTest , "search" ~: searchTest , "fetch" ~: fetchTest + , "imap fetch api" ~: imapFetchTest ]