@@ -43,63 +43,63 @@ Job<C>::Job() : detail::JobBase{C::name()} {}
43
43
// expects, then invoke the function with those types.
44
44
45
45
template <typename C>
46
- void Job<C>::init(const std::filesystem::path& root ) const {
46
+ void Job<C>::init(detail::ISource& source ) const {
47
47
using namespace detail ;
48
48
using Args = typename Params<decltype (C::init)>::type;
49
49
50
- // For each expected input, load the file , and deserialize it into
50
+ // For each expected input, load it , and deserialize it into
51
51
// the appropriate type. Return the types as a tuple, which can then
52
52
// std::apply to C::init, passing the inputs.
53
+ size_t DEBUG_ONLY nextIdx = 0 ;
53
54
std::apply (
54
55
C::init,
55
56
typesToValues<Args>(
56
57
[&] (size_t idx, auto tag) {
57
- return deserialize<typename decltype (tag)::Type>(
58
- root / folly::to<std::string>(idx)
59
- );
58
+ assertx (idx == nextIdx++);
59
+ return deserialize<typename decltype (tag)::Type>(source);
60
60
}
61
61
)
62
62
);
63
+ source.initDone ();
63
64
64
65
using Ret = typename Return<decltype (C::init)>::type;
65
66
static_assert (std::is_void_v<Ret>, " init() must return void" );
66
67
}
67
68
68
69
template <typename C>
69
- void Job<C>::fini(const std::filesystem::path& outputRoot ) const {
70
+ void Job<C>::fini(detail::ISink& sink ) const {
70
71
using namespace detail ;
71
72
73
+ sink.startFini ();
72
74
using Ret = typename Return<decltype (C::fini)>::type;
73
75
if constexpr (std::is_void_v<Ret>) {
74
76
C::fini ();
75
77
} else {
76
- auto const output = outputRoot / " fini" ;
77
- std::filesystem::create_directory (output, outputRoot);
78
78
auto const v = C::fini ();
79
- time (" writing fini outputs" , [&] { return serialize (v, 0 , output ); });
79
+ time (" writing fini outputs" , [&] { return serialize (v, sink ); });
80
80
}
81
81
}
82
82
83
83
template <typename C>
84
- void Job<C>::run(const std::filesystem::path& inputRoot,
85
- const std::filesystem::path& outputRoot) const {
84
+ void Job<C>::run(detail::ISource& source, detail::ISink& sink) const {
86
85
using namespace detail ;
87
86
88
- // For each expected input, load the file , and deserialize it into
89
- // the appropriate type, turning all of the types into a tuple.
87
+ // For each expected input, load it , and deserialize it into the
88
+ // appropriate type, turning all of the types into a tuple.
90
89
using Args = typename Params<decltype (C::run)>::type;
90
+ size_t DEBUG_ONLY nextIdx = 0 ;
91
91
auto inputs = time (
92
92
" loading inputs" ,
93
93
[&] {
94
94
return typesToValues<Args>(
95
95
[&] (size_t idx, auto tag) {
96
- return deserialize<typename decltype (tag)::Type>(
97
- inputRoot / folly::to<std::string>(idx)
98
- );
96
+ assertx (idx == nextIdx++);
97
+ return deserialize<typename decltype (tag)::Type>(source);
99
98
}
100
99
);
101
100
}
102
101
);
102
+ source.nextInput ();
103
103
104
104
// Apply the tuple to C::run, passing the types as parameters.
105
105
auto outputs = time (
@@ -110,8 +110,9 @@ void Job<C>::run(const std::filesystem::path& inputRoot,
110
110
using Ret = typename Return<decltype (C::run)>::type;
111
111
static_assert (!std::is_void_v<Ret>, " run() must return something" );
112
112
113
- // Serialize the outputs into the output directory.
114
- time (" writing outputs" , [&] { return serialize (outputs, 0 , outputRoot); });
113
+ // Serialize the outputs
114
+ time (" writing outputs" , [&] { return serialize (outputs, sink); });
115
+ sink.nextOutput ();
115
116
}
116
117
117
118
// ////////////////////////////////////////////////////////////////////
@@ -120,81 +121,99 @@ namespace detail {
120
121
121
122
// ////////////////////////////////////////////////////////////////////
122
123
123
- // Given a file path, load the contents of the file, deserialize them
124
- // into the type T, and return it.
124
+ // Turn a blob into a specific (non-marker) type
125
125
template <typename T>
126
- T JobBase::deserialize ( const std::filesystem::path& path ) {
126
+ T JobBase::deserializeBlob ( std::string blob ) {
127
127
using namespace detail ;
128
+ static_assert (!IsMarker<T>::value, " Special markers cannot be nested" );
128
129
if constexpr (std::is_same<T, std::string>::value) {
129
130
// A std::string is always stored as itself (this lets us store
130
- // files as their contents without having to encode them).
131
- return readFile (path);
132
- } else if constexpr (IsVariadic<T>::value) {
131
+ // files directly as their contents without having to encode
132
+ // them).
133
+ return blob;
134
+ } else {
135
+ // For most types, the data is encoded using BlobEncoder, so undo
136
+ // that.
137
+ BlobDecoder decoder{blob.data (), blob.size ()};
138
+ return decoder.makeWhole <T>();
139
+ }
140
+ }
141
+
142
+ // Deserialize the given input source into the type T and return
143
+ // it. The type might include markers, which might trigger
144
+ // sub-deserializations.
145
+ template <typename T>
146
+ T JobBase::deserialize (ISource& source) {
147
+ using namespace detail ;
148
+ static_assert (!IsMulti<T>::value, " Multi can only be used as return type" );
149
+
150
+ if constexpr (IsVariadic<T>::value) {
133
151
static_assert (!IsMarker<typename T::Type>::value,
134
152
" Special markers cannot be nested" );
135
- // Variadic<T> is actually a directory, not a file. Recurse into
136
- // it, and do the deserialization for every file within it.
153
+ auto const blobs = source.variadic ();
137
154
T out;
138
- for (size_t i = 0 ;; ++i) {
139
- auto const valPath = path / folly::to<std::string>(i);
140
- // A break in the numbering means the end of the vector.
141
- if (!std::filesystem::exists (valPath)) break ;
142
- out.vals .emplace_back (deserialize<typename T::Type>(valPath));
155
+ out.vals .reserve (blobs.size ());
156
+ for (auto const & blob : blobs) {
157
+ out.vals .emplace_back (deserializeBlob<typename T::Type>(blob));
143
158
}
144
159
return out;
145
160
} else if constexpr (IsOpt<T>::value) {
146
161
static_assert (!IsMarker<typename T::Type>::value,
147
162
" Special markers cannot be nested" );
148
- // Opt<T> is like T, except the file may not exist (so is nullopt
163
+ // Opt<T> is like T, except the data may not exist (so is nullopt
149
164
// otherwise).
150
165
T out;
151
- if (std::filesystem::exists (path )) {
152
- out.val .emplace (deserialize <typename T::Type>(path ));
166
+ if (auto const blob = source. optBlob ( )) {
167
+ out.val .emplace (deserializeBlob <typename T::Type>(*blob ));
153
168
}
154
169
return out;
155
170
} else {
156
- // For most types, the data is encoded using BlobEncoder, so undo
157
- // that.
158
- static_assert (!IsMulti<T>::value, " Multi can only be used as return type" );
159
- auto const data = readFile (path);
160
- BlobDecoder decoder{data.data (), data.size ()};
161
- return decoder.makeWhole <T>();
171
+ return deserializeBlob<T>(source.blob ());
162
172
}
163
173
}
164
174
165
- // Given a value, an index of that value (its positive in the output
166
- // values), and an output root, serialize the value, and write its
167
- // contents to the appropriate file.
175
+ // Serialize the given (non-marker) value into a blob
168
176
template <typename T>
169
- void JobBase::serialize (const T& v,
170
- size_t idx,
171
- const std::filesystem::path& root) {
177
+ std::string JobBase::serializeBlob (const T& v) {
172
178
using namespace detail ;
179
+ static_assert (!IsMarker<T>::value,
180
+ " Special markers cannot be nested" );
173
181
if constexpr (std::is_same<T, std::string>::value) {
174
- // std::string isn't serialized, but written as itself as
175
- // root/idx.
176
- return writeFile (root / folly::to<std::string>(idx), v.data (), v.size ());
177
- } else if constexpr (IsVariadic<T>::value) {
178
- // For Variadic<T>, we create a directory root/idx, and under it,
179
- // write a file for every element in the vector.
182
+ // std::string always encodes to itself
183
+ return v;
184
+ } else {
185
+ BlobEncoder encoder;
186
+ encoder (v);
187
+ return std::string{(const char *)encoder.data (), encoder.size ()};
188
+ }
189
+ }
190
+
191
+ // Serialize the given value into a blob and write it out to the given
192
+ // output sink. The value's type might be a marker, which can trigger
193
+ // sub-serializations.
194
+ template <typename T>
195
+ void JobBase::serialize (const T& v, ISink& sink) {
196
+ using namespace detail ;
197
+ if constexpr (IsVariadic<T>::value) {
180
198
static_assert (!IsMarker<typename T::Type>::value,
181
199
" Special markers cannot be nested" );
182
- auto const path = root / folly::to<std::string>(idx) ;
183
- std::filesystem::create_directory (path, root);
184
- for ( size_t i = 0 ; i < v. vals . size ( ); ++i) {
185
- serialize (v. vals [i], i, path );
186
- }
200
+ using namespace folly ::gen ;
201
+ auto const blobs = from (v. vals )
202
+ | map ([&] ( const typename T::Type& t) { return serializeBlob (t ); })
203
+ | as<std::vector>( );
204
+ sink. variadic (blobs);
187
205
} else if constexpr (IsOpt<T>::value) {
188
206
// Opt<T> is like T, except nothing is written if the value isn't
189
207
// present.
190
208
static_assert (!IsMarker<typename T::Type>::value,
191
209
" Special markers cannot be nested" );
192
- if (!v.val .has_value ()) return ;
193
- serialize (*v.val , idx, root);
210
+ sink.optBlob (
211
+ v.val .has_value () ? serializeBlob (*v.val ) : Optional<std::string>{}
212
+ );
194
213
} else if constexpr (IsMulti<T>::value) {
195
214
// Treat Multi as equivalent to std::tuple (IE, write each element
196
- // to a separate file ).
197
- assertx (idx == 0 ) ;
215
+ // separately ).
216
+ size_t DEBUG_ONLY nextTupleIdx = 0 ;
198
217
for_each (
199
218
v.vals ,
200
219
[&] (auto const & elem, size_t tupleIdx) {
@@ -204,18 +223,12 @@ void JobBase::serialize(const T& v,
204
223
>::value,
205
224
" Multi cannot be nested"
206
225
);
207
- serialize (elem, tupleIdx, root);
226
+ assertx (tupleIdx == nextTupleIdx++);
227
+ serialize (elem, sink);
208
228
}
209
229
);
210
230
} else {
211
- // Most types are just encoded with BlobEncoder and written as
212
- // root/idx
213
- BlobEncoder encoder;
214
- encoder (v);
215
- writeFile (
216
- root / folly::to<std::string>(idx),
217
- (const char *)encoder.data (), encoder.size ()
218
- );
231
+ sink.blob (serializeBlob (v));
219
232
}
220
233
}
221
234
@@ -225,24 +238,35 @@ void JobBase::serialize(const T& v,
225
238
226
239
// ////////////////////////////////////////////////////////////////////
227
240
228
- inline RefId::RefId (std::string id, size_t size)
229
- : m_id{std::move (id)}, m_size{size}
241
+ inline RefId::RefId (std::string id, size_t size, size_t extra )
242
+ : m_id{std::move (id)}, m_size{size}, m_extra{extra}
230
243
{}
231
244
232
245
inline bool RefId::operator ==(const RefId& o) const {
233
- return std::tie (m_id, m_size) == std::tie (o.m_id , o.m_size );
246
+ return
247
+ std::tie (m_id, m_extra, m_size) ==
248
+ std::tie (o.m_id , o.m_extra , o.m_size );
234
249
}
235
250
236
251
inline bool RefId::operator !=(const RefId& o) const {
237
252
return !(*this == o);
238
253
}
239
254
240
255
inline bool RefId::operator <(const RefId& o) const {
241
- return std::tie (m_size, m_id) < std::tie (o.m_size , o.m_id );
256
+ return
257
+ std::tie (m_id, m_extra, m_size) <
258
+ std::tie (o.m_id , o.m_extra , o.m_size );
242
259
}
243
260
244
261
inline std::string RefId::toString () const {
245
- return folly::sformat (" {}:{}" , m_id, m_size);
262
+ // Don't print out the extra field if it's zero, to avoid clutter
263
+ // for implementations which don't use it. The id might contain
264
+ // binary data, so escape it before printing.
265
+ if (m_extra) {
266
+ return folly::sformat (" {}:{}:{}" , folly::humanify (m_id), m_extra, m_size);
267
+ } else {
268
+ return folly::sformat (" {}:{}" , folly::humanify (m_id), m_size);
269
+ }
246
270
}
247
271
248
272
// ////////////////////////////////////////////////////////////////////
0 commit comments