X-Git-Url: http://git.tdb.fi/?a=blobdiff_plain;f=scrape-item-data.py;h=84def9232b6cf2ddaedc1e3353ed00e17d82f58a;hb=aaa9d76db026c832bfdd936c4069128bde6b7311;hp=3040063b75b5f32c1fac2021c16e2381dc174800;hpb=980a6425ae617f403010b70e170805b755feb98f;p=poefilter.git diff --git a/scrape-item-data.py b/scrape-item-data.py index 3040063..84def92 100755 --- a/scrape-item-data.py +++ b/scrape-item-data.py @@ -3,16 +3,56 @@ import requests import html.parser import codecs +import argparse class Item: def __init__(self, name, kind): self.name = name self.kind = kind self.droplevel = 0 - self.upgrade = None - - def is_upgrade_for(self, other): - return self.kind==other.kind and self.droplevel>other.droplevel + self.mods = {} + self.upgrades = {"attrs":None, "mods":None} + + def is_compatible(self, other): + return self.kind==other.kind + + def compare_mods(self, other): + result = 0 + for n, v in self.mods.items(): + ov = other.mods.get(n, 0) + if vov: + result = 1 + + for on, ov in other.mods.items(): + v = self.mods.get(on, 0) + if vov: + result = 1 + + return result + + def compare_attrs(self, other): + return 0 + + def check_upgrade(self, other): + if not self.is_compatible(other): + return + + mods = self.compare_mods(other) + attrs = self.compare_attrs(other) + + if self.droplevel0 or (mods==0 and attrs>0): + other.upgrades["mods"] = self + if not other.upgrades["attrs"]: + if attrs>0 or (attrs==0 and mods>0): + other.upgrades["attrs"] = self class Armor(Item): def __init__(self, name, kind): @@ -22,7 +62,7 @@ class Armor(Item): self.evasion = 0 self.energy_shield = 0 - def is_upgrade_for(self, other): + def is_compatible(self, other): if (self.armor!=0)!=(other.armor!=0): return False if (self.evasion!=0)!=(other.evasion!=0): @@ -30,7 +70,24 @@ class Armor(Item): if (self.energy_shield!=0)!=(other.energy_shield!=0): return False - return super(Armor, self).is_upgrade_for(other) + return super(Armor, self).is_compatible(other) + + def compare_attrs(self, other): + if self.armorother.armor: + return 1 + if self.evasion>other.evasion: + return 1 + if self.energy_shield>other.energy_shield: + return 1 + + return 0 class Weapon(Item): def __init__(self, name, kind): @@ -39,6 +96,14 @@ class Weapon(Item): self.speed = 0 self.dps = 0 + def compare_attrs(self, other): + if self.dpsother.dps: + return 1 + + return 0 + class ItemDataParser(html.parser.HTMLParser): def __init__(self): super(ItemDataParser, self).__init__() @@ -46,12 +111,14 @@ class ItemDataParser(html.parser.HTMLParser): self.in_items_table = False self.column = 0 self.in_cell = False - self.ignore_row = False - self.items = {} + self.items = [] self.current_item = None self.in_heading = False self.headings = [] self.current_heading = None + self.mod_row = False + self.mod_names = [] + self.mod_index = 0 def handle_starttag(self, tag, attrs): if tag=="table": @@ -59,15 +126,18 @@ class ItemDataParser(html.parser.HTMLParser): if n=="class" and v=="itemDataTable": self.in_items_table = True elif tag=="tr": - self.ignore_row = False + self.mod_row = False for n, v in attrs: if n=="class" and v.endswith("_mod"): - self.ignore_row = True + self.mod_row = True self.column = 0 - self.current_item = None + if not self.mod_row: + self.current_item = None + self.mod_names = [] elif tag=="td": self.column += 1 - self.in_cell = True + if self.in_items_table: + self.in_cell = True elif tag=="h1": self.in_heading = True @@ -77,27 +147,48 @@ class ItemDataParser(html.parser.HTMLParser): elif tag=="td": self.in_cell = False elif tag=="tr": - if self.current_item: - for it in self.items.values(): - if not it.upgrade and self.current_item.is_upgrade_for(it): - it.upgrade = self.current_item - break + if self.current_item and self.mod_row: + for it in self.items: + self.current_item.check_upgrade(it) elif tag=="h1": self.in_heading = False + elif tag=="html": + for i in range(1, len(self.items)): + for j in range(i): + self.items[j].check_upgrade(self.items[i]) def handle_data(self, data): data = data.strip() + if not data: + return + if self.in_heading: self.current_heading = data + if self.current_heading=="Staff": + self.current_heading = "Stave" self.headings.append(self.current_heading) - elif self.in_items_table and self.in_cell and data and not self.ignore_row: - if self.column==2: - self.current_item = self.create_item(data, self.current_heading) - self.items[data] = self.current_item - elif self.column==3: - self.current_item.droplevel = int(data) - elif self.column>=4: - self.handle_value(self.column, data) + elif self.in_cell: + if self.mod_row: + if self.column==1: + self.mod_names.append(data) + self.mod_index = 0 + elif self.column==2: + if " to " in data: + value = int(data.split(" to ", 1)[1]) + else: + value = int(data) + name = self.mod_names[self.mod_index] + if name!="From Armour Movement Speed +%": + self.current_item.mods[name] = value + self.mod_index += 1 + else: + if self.column==2: + self.current_item = self.create_item(data, self.current_heading) + self.items.append(self.current_item) + elif self.column==3: + self.current_item.droplevel = int(data) + elif self.column>=4: + self.handle_value(self.column, data) def create_item(self, name, kind): pass @@ -127,32 +218,87 @@ class WeaponDataParser(ItemDataParser): elif column==6: self.current_item.dps = float(data) +def get_upgrade_level(item, steps): + level = 0 + for p in item.upgrades.keys(): + upgrade = item + for i in range(steps): + upgrade = upgrade.upgrades.get(p) + if not upgrade: + return 0 + level = max(level, upgrade.droplevel) + return level + def write_best_category(out, prefix, items, steps): best = ["best", "second"] out.write('category "{}.{}_at_level"\n'.format(prefix, best[steps-1])) out.write("{\n\tor\n\t{\n") for it in items: - upgrade = it - for k in range(steps): - upgrade = upgrade.upgrade - if not upgrade: - break + upgrade_level = get_upgrade_level(it, steps) out.write("\t\tand\n\t\t{\n") - if upgrade: + if upgrade_level: out.write('\t\t\tbase_type "{}";\n'.format(it.name)) - out.write("\t\t\titem_level {} {};\n".format(it.droplevel, upgrade.droplevel-1)) + out.write("\t\t\titem_level {} {};\n".format(it.droplevel, upgrade_level-1)) else: out.write('\t\t\tbase_type "{}";\n'.format(it.name)) out.write("\t\t\tmin_item_level {};\n".format(it.droplevel)) out.write("\t\t};\n") out.write("\t};\n};\n") +def print_debug(items): + upgrades = set() + for it in items: + for u in it.upgrades.values(): + if u: + upgrades.add(u.name) + + printed = set() + + queue = [] + while 1: + for it in items: + if it.name not in upgrades and it.name not in printed: + queue.append(it) + break + + if not queue: + break + + while queue: + it = queue.pop(0) + if it.name in printed: + continue + printed.add(it.name) + + print(it.name) + print(" level: {}".format(it.droplevel)) + if isinstance(it, Armor): + print(" armor: {}".format(it.armor)) + print(" evasion: {}".format(it.evasion)) + print(" energy shield: {}".format(it.energy_shield)) + elif isinstance(it, Weapon): + print(" dps: {}".format(it.dps)) + for n, v in it.mods.items(): + print(" {}: {}".format(n, v)) + for t, u in it.upgrades.items(): + if u: + print(" upgrade {}: {}".format(t, u.name)) + if u.name not in printed: + queue.insert(0, u) + def main(): + parser = argparse.ArgumentParser() + parser.add_argument("-g", "--debug", action="store_true", dest="debug") + args = parser.parse_args() + r = requests.get("https://www.pathofexile.com/item-data/armour") p = ArmorDataParser() p.feed(codecs.decode(r.content, r.encoding)) + if args.debug: + print_debug(p.items) + types = {"robe": lambda i: (not i.armor and not i.evasion and i.energy_shield), "cloth": lambda i: (not i.armor and i.evasion and i.energy_shield), "leather": lambda i: (not i.armor and i.evasion and not i.energy_shield), @@ -163,27 +309,10 @@ def main(): out = open("armor.txt", "w") for t, f in types.items(): - items = [i for i in p.items.values() if f(i)] + items = [i for i in p.items if f(i)] for i in range(1, 3): write_best_category(out, "armor.{}".format(t), items, i) - """out.write('category "armor.{}.{}_at_level"\n'.format(t, best[i-1])) - out.write("{\n\tor\n\t{\n") - for it in items: - next_level = 0 - n = it.name - for k in range(i): - n = p.upgrades.get(n, "") - if n: - next_level = p.items[n].droplevel - if next_level: - out.write("\t\tand\n\t\t{\n") - out.write('\t\t\tbase_type "{}";\n'.format(it.name)) - out.write("\t\t\tmax_item_level {};\n".format(next_level-1)) - out.write("\t\t};\n") - else: - out.write('\t\tbase_type "{}";\n'.format(it.name)) - out.write("\t};\n};\n")""" out.write('category "armor.{}"\n'.format(t)) out.write("{\n\tor\n\t{\n") @@ -191,6 +320,13 @@ def main(): out.write('\t\tbase_type "{}";\n'.format(it.name)) out.write("\t};\n};\n") + for b in ("best", "second"): + out.write('category "armor.{}_at_level"\n'.format(b)) + out.write('{\n\tor\n\t{\n') + for t in types.keys(): + out.write('\t\tcategory "armor.{}.{}_at_level";\n'.format(t, b)) + out.write("\t};\n};\n") + out.write('category "armor"\n{\n\tor\n\t{\n') for h in p.headings: out.write('\t\tclass "{}";\n'.format(h)) @@ -200,14 +336,24 @@ def main(): p = WeaponDataParser() p.feed(codecs.decode(r.content, r.encoding)) + if args.debug: + print_debug(p.items) + out = open("weapons.txt", "w") for h in p.headings: - items = [i for i in p.items.values() if i.kind==h] + items = [i for i in p.items if i.kind==h] for i in range(1, 3): write_best_category(out, "weapon.{}".format(h.lower().replace(' ', '_')), items, i) + for b in ("best", "second"): + out.write('category "weapon.{}_at_level"\n'.format(b)) + out.write('{\n\tor\n\t{\n') + for h in p.headings: + out.write('\t\tcategory "weapon.{}.{}_at_level";\n'.format(h.lower().replace(' ', '_'), b)) + out.write("\t};\n};\n") + out.write('category "weapon"\n{\n\tor\n\t{\n') for h in p.headings: out.write('\t\tclass "{}";\n'.format(h))