From 1136a1740e9191e93460577d3f60cd6eb165cfab Mon Sep 17 00:00:00 2001 From: Michael Gasvoda Date: Thu, 28 Jun 2018 13:28:46 -0400 Subject: [PATCH 1/3] fixing windows path bug --- quantgov/corpus/structures.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/quantgov/corpus/structures.py b/quantgov/corpus/structures.py index 46cdd15..6a5445a 100644 --- a/quantgov/corpus/structures.py +++ b/quantgov/corpus/structures.py @@ -286,6 +286,13 @@ def __init__(self, index, bucket, encoding='utf-8', cache=True): super(IndexDriver, self).__init__( index_labels=index_labels, encoding=encoding, cache=cache) + def gen_indices_and_paths(self): + with self.index.open() as inf: + reader = csv.reader(inf) + next(reader) + for row in reader: + yield tuple(row[:-1]), row[-1] + def read(self, docinfo): idx, path = docinfo body = self.client.get_object(Bucket=self.bucket, From def564b81f3c094e31562c3b40ee1c33ed8ebce3 Mon Sep 17 00:00:00 2001 From: Michael Gasvoda Date: Thu, 28 Jun 2018 13:30:09 -0400 Subject: [PATCH 2/3] Version Bump --- quantgov/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/quantgov/__init__.py b/quantgov/__init__.py index 2da9b57..c699782 100644 --- a/quantgov/__init__.py +++ b/quantgov/__init__.py @@ -13,4 +13,4 @@ from .utils import load_driver -__version__ = '0.4.1' +__version__ = '0.4.2' From f0284001cd1745c31c8c492c2521aa4b90227eca Mon Sep 17 00:00:00 2001 From: Michael Gasvoda Date: Thu, 28 Jun 2018 13:34:15 -0400 Subject: [PATCH 3/3] applying user specified encoding --- quantgov/corpus/structures.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/quantgov/corpus/structures.py b/quantgov/corpus/structures.py index 6a5445a..08e325e 100644 --- a/quantgov/corpus/structures.py +++ b/quantgov/corpus/structures.py @@ -287,7 +287,7 @@ def __init__(self, index, bucket, encoding='utf-8', cache=True): index_labels=index_labels, encoding=encoding, cache=cache) def gen_indices_and_paths(self): - with self.index.open() as inf: + with self.index.open(encoding=self.encoding) as inf: reader = csv.reader(inf) next(reader) for row in reader: