bit7z 4.0.9
A C++ library for interfacing with the 7-zip shared libs.
Loading...
Searching...
No Matches
bitextractor.hpp
1/*
2 * bit7z - A C++ static library to interface with the 7-zip shared libraries.
3 * Copyright (c) 2014-2023 Riccardo Ostani - All Rights Reserved.
4 *
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at https://mozilla.org/MPL/2.0/.
8 */
9
10#ifndef BITEXTRACTOR_HPP
11#define BITEXTRACTOR_HPP
12
13#include <algorithm>
14
15#include "bitabstractarchiveopener.hpp"
16#include "biterror.hpp"
17#include "bitexception.hpp"
18#include "bitinputarchive.hpp"
19
20namespace bit7z {
21
22namespace filesystem { // NOLINT(modernize-concat-nested-namespaces)
23namespace fsutil {
24auto wildcard_match( const tstring& pattern, const tstring& str ) -> bool;
25} // namespace fsutil
26} // namespace filesystem
27
33template< typename Input >
35 public:
51 explicit BitExtractor( const Bit7zLibrary& lib, const BitInFormat& format BIT7Z_DEFAULT_FORMAT )
53
60 void extract( Input inArchive, const tstring& outDir = {} ) const {
61 BitInputArchive inputArchive( *this, inArchive );
62 inputArchive.extractTo( outDir );
63 }
64
72 void extract( Input inArchive, vector< byte_t >& outBuffer, uint32_t index = 0 ) const {
73 BitInputArchive inputArchive( *this, inArchive );
74 inputArchive.extractTo( outBuffer, index );
75 }
76
84 void extract( Input inArchive, std::ostream& outStream, uint32_t index = 0 ) const {
85 BitInputArchive inputArchive( *this, inArchive );
86 inputArchive.extractTo( outStream, index );
87 }
88
96 void extract( Input inArchive, std::map< tstring, vector< byte_t > >& outMap ) const {
97 BitInputArchive inputArchive( *this, inArchive );
98 inputArchive.extractTo( outMap );
99 }
100
109 void extractMatching( Input inArchive,
110 const tstring& itemFilter,
111 const tstring& outDir = {},
112 FilterPolicy policy = FilterPolicy::Include ) const {
113 using namespace filesystem;
114
115 if ( itemFilter.empty() ) {
116 throw BitException( "Cannot extract items", make_error_code( BitError::FilterNotSpecified ) );
117 }
118
119 extractMatchingFilter( inArchive, outDir, policy, [ &itemFilter ]( const tstring& itemPath ) -> bool {
120 return fsutil::wildcard_match( itemFilter, itemPath );
121 } );
122 }
123
132 void extractMatching( Input inArchive,
133 const tstring& itemFilter,
134 vector< byte_t >& outBuffer,
135 FilterPolicy policy = FilterPolicy::Include ) const {
136 using namespace filesystem;
137
138 if ( itemFilter.empty() ) {
139 throw BitException( "Cannot extract items", make_error_code( BitError::FilterNotSpecified ) );
140 }
141
142 extractMatchingFilter( inArchive, outBuffer, policy,
143 [ &itemFilter ]( const tstring& itemPath ) -> bool {
144 return fsutil::wildcard_match( itemFilter, itemPath );
145 } );
146 }
147
155 void extractItems( Input inArchive,
156 const std::vector< uint32_t >& indices,
157 const tstring& outDir = {} ) const {
158 if ( indices.empty() ) {
159 throw BitException( "Cannot extract items", make_error_code( BitError::IndicesNotSpecified ) );
160 }
161
162 BitInputArchive inputArchive( *this, inArchive );
163 inputArchive.extractTo( outDir, indices );
164 }
165
166#ifdef BIT7Z_REGEX_MATCHING
167
178 void extractMatchingRegex( Input inArchive,
179 const tstring& regex,
180 const tstring& outDir = {},
181 FilterPolicy policy = FilterPolicy::Include ) const {
182 if ( regex.empty() ) {
183 throw BitException( "Cannot extract items", make_error_code( BitError::FilterNotSpecified ) );
184 }
185
186 const tregex regexFilter( regex, tregex::ECMAScript | tregex::optimize );
187 extractMatchingFilter( inArchive, outDir, policy, [ &regexFilter ]( const tstring& itemPath ) -> bool {
188 return std::regex_match( itemPath, regexFilter );
189 } );
190 }
191
202 void extractMatchingRegex( Input inArchive,
203 const tstring& regex,
204 vector< byte_t >& outBuffer,
205 FilterPolicy policy = FilterPolicy::Include ) const {
206 if ( regex.empty() ) {
207 throw BitException( "Cannot extract items", make_error_code( BitError::FilterNotSpecified ) );
208 }
209
210 const tregex regexFilter( regex, tregex::ECMAScript | tregex::optimize );
211 return extractMatchingFilter( inArchive, outBuffer, policy,
212 [ &regexFilter ]( const tstring& itemPath ) -> bool {
213 return std::regex_match( itemPath, regexFilter );
214 } );
215 }
216
217#endif
218
226 void test( Input inArchive ) const {
227 BitInputArchive inputArchive( *this, inArchive );
228 inputArchive.test();
229 }
230
231 private:
232 void extractMatchingFilter( Input inArchive,
233 const tstring& outDir,
234 FilterPolicy policy,
235 const std::function< bool( const tstring& ) >& filter ) const {
236 BitInputArchive inputArchive( *this, inArchive );
237
238 vector< uint32_t > matchedIndices;
239 const bool shouldExtractMatchedItems = policy == FilterPolicy::Include;
240 // Searching for files inside the archive that match the given filter
241 for ( const auto& item : inputArchive ) {
242 const bool itemMatches = filter( item.path() );
243 if ( itemMatches == shouldExtractMatchedItems ) {
244 /* The if-condition is equivalent to an exclusive XNOR (negated XOR) between
245 * itemMatches and shouldExtractMatchedItems.
246 * In other words, it is true only if the current item either:
247 * - matches the filter, and we must include any matching item; or
248 * - doesn't match the filter, and we must exclude those that match. */
249 matchedIndices.push_back( item.index() );
250 }
251 }
252
253 if ( matchedIndices.empty() ) {
254 throw BitException( "Cannot extract items", make_error_code( BitError::NoMatchingItems ) );
255 }
256
257 inputArchive.extractTo( outDir, matchedIndices );
258 }
259
260 void extractMatchingFilter( Input inArchive,
261 vector< byte_t >& outBuffer,
262 FilterPolicy policy,
263 const std::function< bool( const tstring& ) >& filter ) const {
264 BitInputArchive inputArchive( *this, inArchive );
265
266 const bool shouldExtractMatchedItem = policy == FilterPolicy::Include;
267 // Searching for files inside the archive that match the given filter
268 for ( const auto& item : inputArchive ) {
269 const bool itemMatches = filter( item.path() );
270 if ( itemMatches == shouldExtractMatchedItem ) {
271 /* The if-condition is equivalent to an exclusive NOR (negated XOR) between
272 * itemMatches and shouldExtractMatchedItem. */
273 inputArchive.extractTo( outBuffer, item.index() );
274 return;
275 }
276 }
277
278 throw BitException( "Failed to extract items", make_error_code( BitError::NoMatchingItems ) );
279 }
280};
281
282} // namespace bit7z
283
284#endif //BITEXTRACTOR_HPP
The Bit7zLibrary class allows accessing the basic functionalities provided by the 7z DLLs.
Definition bit7zlibrary.hpp:56
The BitAbstractArchiveOpener abstract class represents a generic archive opener.
Definition bitabstractarchiveopener.hpp:26
auto format() const noexcept -> const BitInFormat &override
The BitException class represents a generic exception thrown from the bit7z classes.
Definition bitexception.hpp:32
The BitExtractor template class allows extracting the content of archives from supported input types.
Definition bitextractor.hpp:34
void extract(Input inArchive, vector< byte_t > &outBuffer, uint32_t index=0) const
Extracts a file from the given archive to the output buffer.
Definition bitextractor.hpp:72
void test(Input inArchive) const
Tests the given archive without extracting its content.
Definition bitextractor.hpp:226
BitExtractor(const Bit7zLibrary &lib, const BitInFormat &format=BitFormat::Auto)
Constructs a BitExtractor object.
Definition bitextractor.hpp:51
void extractMatchingRegex(Input inArchive, const tstring &regex, vector< byte_t > &outBuffer, FilterPolicy policy=FilterPolicy::Include) const
Extracts the first file in the archive that matches the given regex pattern to the output buffer.
Definition bitextractor.hpp:202
void extractMatching(Input inArchive, const tstring &itemFilter, const tstring &outDir={}, FilterPolicy policy=FilterPolicy::Include) const
Extracts the files in the archive that match the given wildcard pattern to the chosen directory.
Definition bitextractor.hpp:109
void extract(Input inArchive, const tstring &outDir={}) const
Extracts the given archive to the chosen directory.
Definition bitextractor.hpp:60
void extractMatching(Input inArchive, const tstring &itemFilter, vector< byte_t > &outBuffer, FilterPolicy policy=FilterPolicy::Include) const
Extracts to the output buffer the first file in the archive matching the given wildcard pattern.
Definition bitextractor.hpp:132
void extract(Input inArchive, std::ostream &outStream, uint32_t index=0) const
Extracts a file from the given archive to the output stream.
Definition bitextractor.hpp:84
void extractMatchingRegex(Input inArchive, const tstring &regex, const tstring &outDir={}, FilterPolicy policy=FilterPolicy::Include) const
Extracts the files in the archive that match the given regex pattern to the chosen directory.
Definition bitextractor.hpp:178
void extractItems(Input inArchive, const std::vector< uint32_t > &indices, const tstring &outDir={}) const
Extracts the specified items from the given archive to the chosen directory.
Definition bitextractor.hpp:155
void extract(Input inArchive, std::map< tstring, vector< byte_t > > &outMap) const
Extracts the content of the given archive into a map of memory buffers, where the keys are the paths ...
Definition bitextractor.hpp:96
The BitInFormat class specifies an extractable archive format.
Definition bitformat.hpp:58
The BitInputArchive class, given a handler object, allows reading/extracting the content of archives.
Definition bitinputarchive.hpp:40
void test() const
Tests the archive without extracting its content.
void extractTo(const tstring &outDir) const
Extracts the archive to the chosen directory.
The main namespace of the bit7z library.
Definition bit7zlibrary.hpp:30
FilterPolicy
Enumeration representing the policy according to which the archive handler should treat the items tha...
Definition bitabstractarchivehandler.hpp:66
@ Include
Extract/compress the items that match the pattern.
std::basic_regex< tchar > tregex
Definition bittypes.hpp:98
std::basic_string< tchar > tstring
Definition bittypes.hpp:91