@@ -180,6 +180,17 @@ def saveSubset(filename, idx):
180180 return len (testing ), len (training )
181181
182182def main (args ):
183+ # blacklisted datasets
184+ blacklisted = []
185+ if args .blacklist is not None :
186+ with open (args .blacklist , 'r' ) as f :
187+ blacklisted = json .load (f )
188+ pass
189+ blacklisted = set ([
190+ '/' .join (item )
191+ for item in blacklisted
192+ ])
193+ print (blacklisted )
183194 stats = {
184195 'placeId' : [],
185196 'userId' : [],
@@ -204,6 +215,13 @@ def main(args):
204215 if not (sid in stats ['screenId' ]):
205216 stats ['screenId' ].append (sid )
206217 path = os .path .join (folder , placeId , userId , screenId )
218+ # check if the dataset is blacklisted
219+ # placeId twice since real screenId is "placeId/screenId"
220+ uuid = '/' .join ([userId , placeId , placeId , screenId ])
221+ print (uuid )
222+ if uuid in blacklisted :
223+ print ('Skipping blacklisted dataset:' , path )
224+ continue
207225 testFramesN , trainFramesN = processFolder (
208226 path ,
209227 args .time_delta , args .test_ratio , args .frames_per_chunk ,
@@ -246,6 +264,10 @@ def main(args):
246264 '--min-frames' , type = int , default = 0 ,
247265 help = 'Minimum number of frames in a chunk'
248266 )
267+ parser .add_argument (
268+ '--blacklist' , type = str , default = None ,
269+ help = 'Path to the blacklist file'
270+ )
249271
250272 args = parser .parse_args ()
251273 main (args )
0 commit comments