#!/usr/bin/env python3
# -*- coding:utf-8 -*-
__author__ = 'Mu Yang <emfomy@gmail.com>'
__copyright__ = 'Copyright 2017-2018'
import warnings
import collections.abc
from cosmel.util import *
from cosmel.repo.brand import *
[docs]class Product:
"""The product object.
Args:
pid (str): the ID.
brand (:class:`.Brand`): the brand.
name (str): the name.
head (str): the head word.
name_ws (str): the segmented name.
descr_ws (str): the segmented description.
"""
def __init__(self, pid, brand, name, head, name_ws, descr_ws):
self.__pid = pid
self.__brand = brand
self.__name = name
self.__head = head
self.__name_ws = WsWords(name_ws)
self.__descr_ws = WsWords(descr_ws)
try:
self.__head_idx = self.name_ws.txts.index(head)
except:
pass
def __str__(self):
return f'{self.__pid} {self.__brand!s} {self.__name}'
def __repr__(self):
return f'{self.__pid} {self.__brand!r} {self.__name_ws}'
def __hash__(self):
return hash(self.pid)
@property
def brand(self):
""":class:`.Brand`: the brand."""
return self.__brand
@property
def pid(self):
"""str: the ID."""
return self.__pid
@property
def name(self):
"""str: the name (excluding brand)."""
return self.__name
@property
def name_ws(self):
""":class:`.WsWords`: the word-segmented name."""
return self.__name_ws
@property
def descr(self):
"""str: the description."""
return txtstr(self.__descr_ws)
@property
def descr_ws(self):
""":class:`.WsWords`: the word-segmented description."""
return self.__descr_ws
@property
def head(self):
"""str: the head word."""
return self.__head
@property
def head_ws(self):
""":class:`.WsWords`: the word-segmented name."""
return self.__name_ws[self.__head_idx]
@property
def infix_ws(self):
""":class:`.WsWords`: the word-segmented infix."""
return self.__name_ws[:self.__head_idx]
[docs] def infix_ws_(self, with_head=True):
""":class:`.WsWords`: the word-segmented infix (with/without head word)."""
return self.__name_ws[:self.__head_idx+with_head]
@property
def suffix_ws(self):
""":class:`.WsWords`: the word-segmented suffix."""
return self.__name_ws[self.__head_idx+1:]
[docs] def suffix_ws_(self, with_head=True):
""":class:`.WsWords`: the word-segmented suffix (with/without head word)."""
return self.__name_ws[self.__head_idx+(not with_head):]
[docs]class ProductSet(collections.abc.Collection):
"""The set of products.
* Item: the product object (:class:`.Product`).
Args:
repo_root (str): the path to the folder containing data files.
bname_to_brand (:class:`.BName2Brand`): the dictionary maps brand name to brand object.
"""
def __init__(self, repo_root, bname_to_brand):
super().__init__()
self.__data = list()
tag_dict = {}
with open(repo_root+'/product.lex') as fin_lex, open(repo_root+'/product.tag') as fin_tag:
for line_lex, line_tag in zip(fin_lex, fin_tag):
line_lex = line_lex.strip()
line_tag = line_tag.strip()
assert not line_lex == ''
assert not line_tag == ''
tag_dict[line_lex.split('\t')[0]] = line_tag
descr_dict = {}
with open(repo_root+'/product.descr') as fin_descr, open(repo_root+'/product.descr.tag') as fin_tag:
for line_descr, line_tag in zip(fin_descr, fin_tag):
line_descr = line_descr.strip()
line_tag = line_tag.strip()
assert not line_descr == ''
assert not line_tag == ''
descr_dict[line_descr.split('\t')[0]] = line_tag
head_dict = {}
with open(repo_root+'/product.head') as fin_head:
for line in fin_head:
line = line.strip()
assert not line == ''
pid, head = line.split('\t')
head_dict[pid] = head
with open(repo_root+'/product.txt') as fin_txt:
for line in fin_txt:
line = line.strip()
assert not line == ''
pid, bname, name = line.split('\t')
descr_ws = descr_dict.get(pid, '')
self.__data.append(Product(pid, bname_to_brand[bname], name, head_dict[pid], tag_dict[name], descr_ws))
def __contains__(self, item):
return item in self.__data
def __iter__(self):
return iter(self.__data)
def __len__(self):
return len(self.__data)
def __str__(self):
return '\n'.join(map(str, self.__data))
def __repr__(self):
return '\n'.join(map(repr, self.__data))
[docs]class Id2Product(collections.abc.Mapping):
"""The dictionary maps ID to product.
* Key: the product ID. (str).
* Item: the product object (:class:`.Product`).
Args:
product_set (:class:`.ProductSet`): the product set.
"""
def __init__(self, product_set):
super().__init__()
self.__data = dict()
for product in product_set:
assert product.pid not in self.__data
self.__data[product.pid] = product
def __contains__(self, key):
return key in self.__data
def __getitem__(self, key):
return self.__data[key]
def __iter__(self):
return iter(self.__data)
def __len__(self):
return len(self.__data)
[docs]class BrandPName2Product(collections.abc.Mapping):
"""The dictionary maps brand object and product name to product object.
* Key: the tuple of brand object (:class:`.Brand`) and product name (str).
* Item: the product object (:class:`.Product`).
Args:
product_set (:class:`.ProductSet`): the product set.
"""
def __init__(self, product_set):
super().__init__()
self.__data = dict()
for product in product_set:
pair = (product.brand, product.name,)
assert pair not in self.__data
self.__data[pair] = product
def __contains__(self, key):
return self.__keytransform__(key) in self.__data
def __getitem__(self, key):
return self.__data[self.__keytransform__(key)]
def __iter__(self):
return iter(self.__data)
def __len__(self):
return len(self.__data)
def __keytransform__(self, key):
return (key[0], purge_string(key[1]),)
[docs]class BNamePName2Product(collections.abc.Sequence):
"""The dictionary maps brand name and product name to product.
* Key: the tuple of brand name (str) and product name (str).
* Item: the product object (:class:`.Product`).
Args:
brand_pname_to_product (:class:`.BrandPName2Product`): the dictionary maps brand object and product name to product object.
bname_to_brand (:class:`.BName2Brand`): the dictionary maps name and brand.
"""
def __init__(self, brand_pname_to_product, bname_to_brand):
super().__init__()
self.__data = brand_pname_to_product
self.__key = bname_to_brand
def __contains__(self, key):
return self.__keytransform__(key) in self.__data
def __getitem__(self, key):
return self.__data[self.__keytransform__(key)]
def __iter__(self):
return iter(self.__data.values())
def __len__(self):
return len(self.__data)
def __keytransform__(self, key):
return (self.__key[key[0]], purge_string(key[1]),)
[docs]class PName2ProductList(collections.abc.Mapping):
"""The dictionary maps product name to product object list.
* Key: the product name (str).
* Item: :class:`.ReadOnlyList` of product object (:class:`.Product`).
Args:
product_set (:class:`.ProductSet`): the product set.
"""
def __init__(self, product_set):
super().__init__()
self.__data = dict()
data_dict = dict()
for product in product_set:
pair = (product.brand, product.head,)
if product.name not in data_dict:
data_dict[product.name] = [product]
else:
data_dict[product.name] += [product]
for name, product_set in data_dict.items():
self.__data[name] = ReadOnlyList(product_set)
def __contains__(self, key):
return self.__keytransform__(key) in self.__data
def __getitem__(self, key):
return self.__data[self.__keytransform__(key)]
def __iter__(self):
return iter(self.__data)
def __len__(self):
return len(self.__data)
def __keytransform__(self, key):
return purge_string(key)
[docs]class BrandHead2ProductList(collections.abc.Mapping):
"""The dictionary maps brand object and head word to product object list.
* Key: tuple of brand object (:class:`.Brand`) and product head word (str).
* Item: :class:`.ReadOnlyList` of product object (:class:`.Product`).
Args:
product_set (:class:`.ProductSet`): the product set.
"""
def __init__(self, product_set):
super().__init__()
self.__product_set = product_set
self.__data = dict()
self.__by_brand = dict()
self.__by_head = dict()
data_dict = dict()
by_brand_dict = dict()
by_head_dict = dict()
for product in product_set:
pair = (product.brand, product.head,)
if pair not in data_dict:
data_dict[pair] = [product]
else:
data_dict[pair] += [product]
if product.brand not in by_brand_dict:
by_brand_dict[product.brand] = [product]
else:
by_brand_dict[product.brand] += [product]
if product.head not in by_head_dict:
by_head_dict[product.head] = [product]
else:
by_head_dict[product.head] += [product]
for pair, products in data_dict.items():
self.__data[pair] = ReadOnlyList(products)
for brand, products in by_brand_dict.items():
self.__by_brand[brand] = ReadOnlyList(products)
for head, products in by_head_dict.items():
self.__by_head[head] = ReadOnlyList(products)
self.__empty_collection = ReadOnlyList()
def __contains__(self, key):
if key[1] == slice(None):
return key[0] in self.__by_brand
else:
return key in self.__data
def __getitem__(self, key):
if key[0] == slice(None) and key[1] == slice(None):
return ReadOnlyList(self.__product_set)
if key[0] == slice(None):
return self.__by_head.get(key[1], self.__empty_collection)
if key[1] == slice(None):
return self.__by_brand.get(key[0], self.__empty_collection)
return self.__data.get(key, self.__empty_collection)
def __iter__(self):
return iter(self.__data)
def __len__(self):
return len(self.__data)
[docs]class BNameHead2ProductList(collections.abc.Sequence):
"""The dictionary maps brand name and head word to product object list.
* Key: tuple of brand name (str) and product head word (str).
* Item: :class:`.ReadOnlyList` of product object (:class:`.Product`).
Args:
brand_head_to_product_list (:class:`.BrandHead2Productlist`):
the dictionary maps brand object and head word to product object list.
bname_to_brand (:class:`.BName2Brand`):
the dictionary maps name and brand.
"""
def __init__(self, brand_head_to_product_list, bname_to_brand):
super().__init__()
self.__data = brand_head_to_product_list
self.__key = bname_to_brand
def __contains__(self, key):
return self.__keytransform__(key) in self.__data
def __getitem__(self, key):
return self.__data[self.__keytransform__(key)]
def __iter__(self):
return iter(self.__data.values())
def __len__(self):
return len(self.__data)
def __keytransform__(self, key):
return (self.__key[key[0]] if key[0] != slice(None) else slice(None), key[1],)