source: filezilla/trunk/fuentes/src/engine/directorylistingparser.h @ 3185

Last change on this file since 3185 was 3185, checked in by jrpelegrina, 2 years ago

Update new version: 3.15.02

File size: 4.8 KB
Line 
1#ifndef __DIRECTORYLISTINGPARSER_H__
2#define __DIRECTORYLISTINGPARSER_H__
3
4/* This class is responsible for parsing the directory listings returned by
5 * the server.
6 * Unfortunatly, RFC959 did not specify the format of directory listings, so
7 * each server uses its own format. In addition to that, in most cases the
8 * listings were not designed to be machine-parsable, they were meant to be
9 * human readable by users of that particular server.
10 * By far the most common format is the one returned by the Unix "ls -l"
11 * command. However, legacy systems are still in place, especially in big
12 * companies. These often use very exotic listing styles.
13 * Another problem are localized listings containing date strings. In some
14 * cases these listings are ambiguous and cannot be distinguished.
15 * Example for an ambiguous date: 04-05-06. All of the 6 permutations for
16 * the location of year, month and day are valid dates.
17 * Some servers send multiline listings where a single entry can span two
18 * lines, this has to be detected as well, as far as possible.
19 *
20 * Some servers send MVS style listings which can consist of just the
21 * filename without any additional data. In order to prevent problems, this
22 * format is only parsed if the server is in fact recognizes as MVS server.
23 *
24 * Please see tests/dirparsertest.cpp for a list of supported formats and the
25 * expected parser result.
26 *
27 * If adding data to the parser, it first decomposes the raw data into lines,
28 * which then are processed further. Each line gets consecutively tested for
29 * different formats, starting with the most common Unix style format.
30 * Lines not containing a recognized format (e.g. a part of a multiline
31 * entry) are rememberd and if the next line cannot be parsed either, they
32 * get concatenated to be parsed again (and discarded if not recognized).
33 */
34
35class CLine;
36class CToken;
37class CControlSocket;
38
39namespace listingEncoding
40{
41        enum type
42        {
43                unknown,
44                normal,
45                ebcdic
46        };
47}
48
49
50class CDirectoryListingParser final
51{
52public:
53        CDirectoryListingParser(CControlSocket* pControlSocket, const CServer& server, listingEncoding::type encoding = listingEncoding::unknown, bool sftp_mode = false);
54        ~CDirectoryListingParser();
55
56        CDirectoryListingParser(CDirectoryListingParser const&) = delete;
57        CDirectoryListingParser& operator=(CDirectoryListingParser const&) = delete;
58
59        CDirectoryListing Parse(const CServerPath &path);
60
61        bool AddData(char *pData, int len);
62        bool AddLine(const wchar_t* pLine);
63
64        void Reset();
65
66        void SetTimezoneOffset(fz::duration const& span) { m_timezoneOffset = span; }
67
68        void SetServer(const CServer& server) { m_server = server; };
69
70protected:
71        CLine *GetLine(bool breakAtEnd, bool& error);
72
73        bool ParseData(bool partial);
74
75        bool ParseLine(CLine &line, const enum ServerType serverType, bool concatenated);
76
77        bool ParseAsUnix(CLine &line, CDirentry &entry, bool expect_date);
78        bool ParseAsDos(CLine &line, CDirentry &entry);
79        bool ParseAsEplf(CLine &line, CDirentry &entry);
80        bool ParseAsVms(CLine &line, CDirentry &entry);
81        bool ParseAsIbm(CLine &line, CDirentry &entry);
82        bool ParseOther(CLine &line, CDirentry &entry);
83        bool ParseAsWfFtp(CLine &line, CDirentry &entry);
84        bool ParseAsIBM_MVS(CLine &line, CDirentry &entry);
85        bool ParseAsIBM_MVS_PDS(CLine &line, CDirentry &entry);
86        bool ParseAsIBM_MVS_PDS2(CLine &line, CDirentry &entry);
87        bool ParseAsIBM_MVS_Migrated(CLine &line, CDirentry &entry);
88        bool ParseAsIBM_MVS_Tape(CLine &line, CDirentry &entry);
89        int ParseAsMlsd(CLine &line, CDirentry &entry);
90        bool ParseAsOS9(CLine &line, CDirentry &entry);
91
92        // Only call this if servertype set to ZVM since it conflicts
93        // with other formats.
94        bool ParseAsZVM(CLine &line, CDirentry &entry);
95
96        // Only call this if servertype set to HPNONSTOP since it conflicts
97        // with other formats.
98        bool ParseAsHPNonstop(CLine &line, CDirentry &entry);
99
100        // Date / time parsers
101        bool ParseUnixDateTime(CLine &line, int &index, CDirentry &entry);
102        bool ParseShortDate(CToken &token, CDirentry &entry, bool saneFieldOrder = false);
103        bool ParseTime(CToken &token, CDirentry &entry);
104
105        // Parse file sizes given like this: 123.4M
106        bool ParseComplexFileSize(CToken& token, int64_t& size, int blocksize = -1);
107
108        bool GetMonthFromName(std::wstring const& name, int &month);
109
110        void DeduceEncoding();
111        void ConvertEncoding(char *pData, int len);
112
113        CControlSocket* m_pControlSocket;
114
115        static std::map<std::wstring, int> m_MonthNamesMap;
116
117        struct t_list
118        {
119                t_list() = default;
120                t_list(char* s, int l)
121                        : p(s), len(l)
122                {}
123
124                char *p;
125                int len;
126        };
127
128        int m_currentOffset;
129
130        std::deque<t_list> m_DataList;
131        std::deque<CRefcountObject<CDirentry>> m_entryList;
132        int64_t m_totalData;
133
134        CLine *m_prevLine;
135
136        CServer m_server;
137
138        bool m_fileListOnly;
139        std::vector<wxString> m_fileList;
140
141        bool m_maybeMultilineVms;
142
143        fz::duration m_timezoneOffset;
144
145        listingEncoding::type m_listingEncoding;
146
147        bool sftp_mode_{};
148};
149
150#endif
Note: See TracBrowser for help on using the repository browser.