gps303/ocid_dload.py

   1 import csv
   2 from logging import getLogger
   3 import requests
   4 from sqlite3 import connect
   5 from zlib import decompressobj, MAX_WBITS
   6
   7 from . import common
   8
   9 log = getLogger("gps303/ocid_dload")
  10
  11 RURL = (
  12     "https://opencellid.org/ocid/downloads"
  13     "?token={token}&type={type}&file={mcc}.csv.gz"
  14 )
  15
  16 SCHEMA = """create table if not exists cells (
  17   "radio" text,
  18   "mcc" int,
  19   "net" int,
  20   "area" int,
  21   "cell" int,
  22   "unit" int,
  23   "lon" int,
  24   "lat" int,
  25   "range" int,
  26   "samples" int,
  27   "changeable" int,
  28   "created" int,
  29   "updated" int,
  30   "averageSignal" int
  31 )"""
  32 DBINDEX = "create index if not exists cell_idx on cells (area, cell)"
  33
  34
  35 class unzipped:
  36     """
  37     File-like object that unzips http response body.
  38     read(size) method returns chunks of binary data as bytes
  39     When used as iterator, splits data to lines
  40     and yelds them as strings.
  41     """
  42
  43     def __init__(self, zstream):
  44         self.zstream = zstream
  45         self.decoder = decompressobj(16 + MAX_WBITS)
  46         self.outdata = b""
  47         self.line = b""
  48
  49     def read(self, n=None):
  50         if self.decoder is None:
  51             return b""
  52         while len(self.outdata) < n:
  53             raw_data = self.zstream.read(n)
  54             self.outdata += self.decoder.decompress(raw_data)
  55             if not raw_data:
  56                 self.decoder = None
  57                 break
  58         if self.outdata:
  59             data, self.outdata = self.outdata[:n], self.outdata[n:]
  60             return data
  61         return b""
  62
  63     def __next__(self):
  64         while True:
  65             splittry = self.line.split(b"\n", maxsplit=1)
  66             if len(splittry) > 1:
  67                 break
  68             moredata = self.read(256)
  69             if not moredata:
  70                 raise StopIteration
  71             self.line += moredata
  72         line, rest = splittry
  73         self.line = rest
  74         return line.decode("utf-8")
  75
  76     def __iter__(self):
  77         return self
  78
  79
  80 def main(conf):
  81     try:
  82         with open(
  83             conf.get("opencellid", "downloadtoken"), encoding="ascii"
  84         ) as fl:
  85             token = fl.read().strip()
  86     except FileNotFoundError:
  87         log.warning("Opencellid access token not configured, cannot download")
  88         return
  89
  90     mcc = conf.get("opencellid", "downloadmcc")
  91     url = RURL.format(token=token, type="mcc", mcc=mcc)
  92     # url = "http://localhost:8000/262.csv.gz"  # TESTING
  93     dbfn = conf.get("opencellid", "dbfn")
  94     count = 0
  95     with requests.get(url, stream=True) as resp, connect(dbfn) as db:
  96         log.debug("Requested %s, result %s", url, resp)
  97         if resp.status_code != 200:
  98             log.error("Error getting %s: %s", url, resp)
  99             return
 100         db.execute("pragma journal_mode = wal")
 101         db.execute(SCHEMA)
 102         db.execute("delete from cells")
 103         rows = csv.reader(unzipped(resp.raw))
 104         for row in rows:
 105             db.execute(
 106                 """insert into cells
 107                    values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
 108                 row,
 109             )
 110             count += 1
 111         db.execute(DBINDEX)
 112     log.info("repopulated %s with %d records for MCC %s", dbfn, count, mcc)
 113
 114
 115 if __name__.endswith("__main__"):
 116     main(common.init(log))