diff --git a/quantgov/__init__.py b/quantgov/__init__.py index 2da9b57..c699782 100644 --- a/quantgov/__init__.py +++ b/quantgov/__init__.py @@ -13,4 +13,4 @@ from .utils import load_driver -__version__ = '0.4.1' +__version__ = '0.4.2' diff --git a/quantgov/corpus/structures.py b/quantgov/corpus/structures.py index 46cdd15..08e325e 100644 --- a/quantgov/corpus/structures.py +++ b/quantgov/corpus/structures.py @@ -286,6 +286,13 @@ def __init__(self, index, bucket, encoding='utf-8', cache=True): super(IndexDriver, self).__init__( index_labels=index_labels, encoding=encoding, cache=cache) + def gen_indices_and_paths(self): + with self.index.open(encoding=self.encoding) as inf: + reader = csv.reader(inf) + next(reader) + for row in reader: + yield tuple(row[:-1]), row[-1] + def read(self, docinfo): idx, path = docinfo body = self.client.get_object(Bucket=self.bucket,