bit7z 4.0.0
A C++ library for interfacing with the 7-zip shared libs.
Loading...
Searching...
No Matches
bitextractor.hpp
1/*
2 * bit7z - A C++ static library to interface with the 7-zip shared libraries.
3 * Copyright (c) 2014-2023 Riccardo Ostani - All Rights Reserved.
4 *
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at https://mozilla.org/MPL/2.0/.
8 */
9
10#ifndef BITEXTRACTOR_HPP
11#define BITEXTRACTOR_HPP
12
13#include <algorithm>
14
15#include "bitabstractarchiveopener.hpp"
16#include "biterror.hpp"
17#include "bitexception.hpp"
18#include "bitinputarchive.hpp"
19
20namespace bit7z {
21
22namespace filesystem { // NOLINT(modernize-concat-nested-namespaces)
23namespace fsutil {
24auto wildcard_match( const tstring& pattern, const tstring& str ) -> bool;
25} // namespace fsutil
26} // namespace filesystem
27
33template< typename Input >
35 public:
51 explicit BitExtractor( const Bit7zLibrary& lib, const BitInFormat& format BIT7Z_DEFAULT_FORMAT )
53
60 void extract( Input inArchive, const tstring& outDir = {} ) const {
61 BitInputArchive inputArchive( *this, inArchive );
62 inputArchive.extractTo( outDir );
63 }
64
72 void extract( Input inArchive, vector< byte_t >& outBuffer, uint32_t index = 0 ) const {
73 BitInputArchive inputArchive( *this, inArchive );
74 inputArchive.extractTo( outBuffer, index );
75 }
76
84 void extract( Input inArchive, std::ostream& outStream, uint32_t index = 0 ) const {
85 BitInputArchive inputArchive( *this, inArchive );
86 inputArchive.extractTo( outStream, index );
87 }
88
96 void extract( Input inArchive, std::map< tstring, vector< byte_t > >& outMap ) const {
97 BitInputArchive inputArchive( *this, inArchive );
98 inputArchive.extractTo( outMap );
99 }
100
109 void extractMatching( Input inArchive,
110 const tstring& itemFilter,
111 const tstring& outDir = {},
112 FilterPolicy policy = FilterPolicy::Include ) const {
113 using namespace filesystem;
114
115 if ( itemFilter.empty() ) {
116 throw BitException( "Cannot extract items", make_error_code( BitError::FilterNotSpecified ) );
117 }
118
119 extractMatchingFilter( inArchive, outDir, policy, [ &itemFilter ]( const tstring& itemPath ) -> bool {
120 return fsutil::wildcard_match( itemFilter, itemPath );
121 } );
122 }
123
132 void extractMatching( Input inArchive,
133 const tstring& itemFilter,
134 vector< byte_t >& outBuffer,
135 FilterPolicy policy = FilterPolicy::Include ) const {
136 using namespace filesystem;
137
138 if ( itemFilter.empty() ) {
139 throw BitException( "Cannot extract items", make_error_code( BitError::FilterNotSpecified ) );
140 }
141
142 extractMatchingFilter( inArchive, outBuffer, policy,
143 [ &itemFilter ]( const tstring& itemPath ) -> bool {
144 return fsutil::wildcard_match( itemFilter, itemPath );
145 } );
146 }
147
155 void extractItems( Input inArchive,
156 const std::vector< uint32_t >& indices,
157 const tstring& outDir = {} ) const {
158 if ( indices.empty() ) {
159 throw BitException( "Cannot extract items", make_error_code( BitError::IndicesNotSpecified ) );
160 }
161
162 BitInputArchive inputArchive( *this, inArchive );
163 uint32_t nItems = inputArchive.itemsCount();
164 // Find if any index passed by the user is not in the valid range [0, itemsCount() - 1]
165 const auto findRes = std::find_if( indices.cbegin(),
166 indices.cend(),
167 [ &nItems ]( uint32_t index ) -> bool {
168 return index >= nItems;
169 } );
170 if ( findRes != indices.cend() ) {
171 throw BitException( "Cannot extract item at the index " + std::to_string( *findRes ),
172 make_error_code( BitError::InvalidIndex ) );
173 }
174
175 inputArchive.extractTo( outDir, indices );
176 }
177
178#ifdef BIT7Z_REGEX_MATCHING
179
190 void extractMatchingRegex( Input inArchive,
191 const tstring& regex,
192 const tstring& outDir = {},
193 FilterPolicy policy = FilterPolicy::Include ) const {
194 if ( regex.empty() ) {
195 throw BitException( "Cannot extract items", make_error_code( BitError::FilterNotSpecified ) );
196 }
197
198 const tregex regexFilter( regex, tregex::ECMAScript | tregex::optimize );
199 extractMatchingFilter( inArchive, outDir, policy, [ &regexFilter ]( const tstring& itemPath ) -> bool {
200 return std::regex_match( itemPath, regexFilter );
201 } );
202 }
203
214 void extractMatchingRegex( Input inArchive,
215 const tstring& regex,
216 vector< byte_t >& outBuffer,
217 FilterPolicy policy = FilterPolicy::Include ) const {
218 if ( regex.empty() ) {
219 throw BitException( "Cannot extract items", make_error_code( BitError::FilterNotSpecified ) );
220 }
221
222 const tregex regexFilter( regex, tregex::ECMAScript | tregex::optimize );
223 return extractMatchingFilter( inArchive, outBuffer, policy,
224 [ &regexFilter ]( const tstring& itemPath ) -> bool {
225 return std::regex_match( itemPath, regexFilter );
226 } );
227 }
228
229#endif
230
238 void test( Input inArchive ) const {
239 BitInputArchive inputArchive( *this, inArchive );
240 inputArchive.test();
241 }
242
243 private:
244 void extractMatchingFilter( Input inArchive,
245 const tstring& outDir,
246 FilterPolicy policy,
247 const std::function< bool( const tstring& ) >& filter ) const {
248 BitInputArchive inputArchive( *this, inArchive );
249
250 vector< uint32_t > matchedIndices;
251 const bool shouldExtractMatchedItems = policy == FilterPolicy::Include;
252 // Searching for files inside the archive that match the given filter
253 for ( const auto& item : inputArchive ) {
254 const bool itemMatches = filter( item.path() );
255 if ( itemMatches == shouldExtractMatchedItems ) {
256 /* The if-condition is equivalent to an exclusive XNOR (negated XOR) between
257 * itemMatches and shouldExtractMatchedItems.
258 * In other words, it is true only if the current item either:
259 * - matches the filter, and we must include any matching item; or
260 * - doesn't match the filter, and we must exclude those that match. */
261 matchedIndices.push_back( item.index() );
262 }
263 }
264
265 if ( matchedIndices.empty() ) {
266 throw BitException( "Cannot extract items", make_error_code( BitError::NoMatchingItems ) );
267 }
268
269 inputArchive.extractTo( outDir, matchedIndices );
270 }
271
272 void extractMatchingFilter( Input inArchive,
273 vector< byte_t >& outBuffer,
274 FilterPolicy policy,
275 const std::function< bool( const tstring& ) >& filter ) const {
276 BitInputArchive inputArchive( *this, inArchive );
277
278 const bool shouldExtractMatchedItem = policy == FilterPolicy::Include;
279 // Searching for files inside the archive that match the given filter
280 for ( const auto& item : inputArchive ) {
281 const bool itemMatches = filter( item.path() );
282 if ( itemMatches == shouldExtractMatchedItem ) {
283 /* The if-condition is equivalent to an exclusive NOR (negated XOR) between
284 * itemMatches and shouldExtractMatchedItem. */
285 inputArchive.extractTo( outBuffer, item.index() );
286 return;
287 }
288 }
289
290 throw BitException( "Failed to extract items", make_error_code( BitError::NoMatchingItems ) );
291 }
292};
293
294} // namespace bit7z
295
296#endif //BITEXTRACTOR_HPP
The Bit7zLibrary class allows accessing the basic functionalities provided by the 7z DLLs.
Definition bit7zlibrary.hpp:56
The BitAbstractArchiveOpener abstract class represents a generic archive opener.
Definition bitabstractarchiveopener.hpp:26
auto format() const noexcept -> const BitInFormat &override
The BitException class represents a generic exception thrown from the bit7z classes.
Definition bitexception.hpp:32
The BitExtractor template class allows extracting the content of archives from supported input types.
Definition bitextractor.hpp:34
void extract(Input inArchive, vector< byte_t > &outBuffer, uint32_t index=0) const
Extracts a file from the given archive to the output buffer.
Definition bitextractor.hpp:72
void test(Input inArchive) const
Tests the given archive without extracting its content.
Definition bitextractor.hpp:238
BitExtractor(const Bit7zLibrary &lib, const BitInFormat &format=BitFormat::Auto)
Constructs a BitExtractor object.
Definition bitextractor.hpp:51
void extractMatchingRegex(Input inArchive, const tstring &regex, vector< byte_t > &outBuffer, FilterPolicy policy=FilterPolicy::Include) const
Extracts the first file in the archive that matches the given regex pattern to the output buffer.
Definition bitextractor.hpp:214
void extractMatching(Input inArchive, const tstring &itemFilter, const tstring &outDir={}, FilterPolicy policy=FilterPolicy::Include) const
Extracts the files in the archive that match the given wildcard pattern to the chosen directory.
Definition bitextractor.hpp:109
void extract(Input inArchive, const tstring &outDir={}) const
Extracts the given archive to the chosen directory.
Definition bitextractor.hpp:60
void extractMatching(Input inArchive, const tstring &itemFilter, vector< byte_t > &outBuffer, FilterPolicy policy=FilterPolicy::Include) const
Extracts to the output buffer the first file in the archive matching the given wildcard pattern.
Definition bitextractor.hpp:132
void extract(Input inArchive, std::ostream &outStream, uint32_t index=0) const
Extracts a file from the given archive to the output stream.
Definition bitextractor.hpp:84
void extractMatchingRegex(Input inArchive, const tstring &regex, const tstring &outDir={}, FilterPolicy policy=FilterPolicy::Include) const
Extracts the files in the archive that match the given regex pattern to the chosen directory.
Definition bitextractor.hpp:190
void extractItems(Input inArchive, const std::vector< uint32_t > &indices, const tstring &outDir={}) const
Extracts the specified items from the given archive to the chosen directory.
Definition bitextractor.hpp:155
void extract(Input inArchive, std::map< tstring, vector< byte_t > > &outMap) const
Extracts the content of the given archive into a map of memory buffers, where the keys are the paths ...
Definition bitextractor.hpp:96
The BitInFormat class specifies an extractable archive format.
Definition bitformat.hpp:58
The BitInputArchive class, given a handler object, allows reading/extracting the content of archives.
Definition bitinputarchive.hpp:31
void test() const
Tests the archive without extracting its content.
void extractTo(const tstring &outDir, const std::vector< uint32_t > &indices={}) const
Extracts the specified items to the chosen directory.
The main namespace of the bit7z library.
Definition bit7zlibrary.hpp:30
FilterPolicy
Enumeration representing the policy according to which the archive handler should treat the items tha...
Definition bitabstractarchivehandler.hpp:66
@ Include
Extract/compress the items that match the pattern.
std::basic_regex< tchar > tregex
Definition bittypes.hpp:97
std::basic_string< tchar > tstring
Definition bittypes.hpp:90