]> git.tdb.fi Git - poefilter.git/blobdiff - scrape-item-data.py
Correctly merge icon and light beam appearances
[poefilter.git] / scrape-item-data.py
index 3174a270c903491d7adbf6362c1d64cf9647a0e3..84def9232b6cf2ddaedc1e3353ed00e17d82f58a 100755 (executable)
@@ -3,6 +3,7 @@
 import requests
 import html.parser
 import codecs
+import argparse
 
 class Item:
        def __init__(self, name, kind):
@@ -37,14 +38,15 @@ class Item:
                return 0
 
        def check_upgrade(self, other):
-               if self.droplevel<=other.droplevel:
-                       return
                if not self.is_compatible(other):
                        return
 
                mods = self.compare_mods(other)
                attrs = self.compare_attrs(other)
 
+               if self.droplevel<other.droplevel and (mods<0 or attrs<0):
+                       return
+
                if not other.upgrades["mods"]:
                        if mods>0 or (mods==0 and attrs>0):
                                other.upgrades["mods"] = self
@@ -109,7 +111,7 @@ class ItemDataParser(html.parser.HTMLParser):
                self.in_items_table = False
                self.column = 0
                self.in_cell = False
-               self.items = {}
+               self.items = []
                self.current_item = None
                self.in_heading = False
                self.headings = []
@@ -134,7 +136,8 @@ class ItemDataParser(html.parser.HTMLParser):
                        self.mod_names = []
                elif tag=="td":
                        self.column += 1
-                       self.in_cell = True
+                       if self.in_items_table:
+                               self.in_cell = True
                elif tag=="h1":
                        self.in_heading = True
 
@@ -145,10 +148,14 @@ class ItemDataParser(html.parser.HTMLParser):
                        self.in_cell = False
                elif tag=="tr":
                        if self.current_item and self.mod_row:
-                               for it in self.items.values():
+                               for it in self.items:
                                        self.current_item.check_upgrade(it)
                elif tag=="h1":
                        self.in_heading = False
+               elif tag=="html":
+                       for i in range(1, len(self.items)):
+                               for j in range(i):
+                                       self.items[j].check_upgrade(self.items[i])
 
        def handle_data(self, data):
                data = data.strip()
@@ -157,8 +164,10 @@ class ItemDataParser(html.parser.HTMLParser):
 
                if self.in_heading:
                        self.current_heading = data
+                       if self.current_heading=="Staff":
+                               self.current_heading = "Stave"
                        self.headings.append(self.current_heading)
-               elif self.in_items_table and self.in_cell:
+               elif self.in_cell:
                        if self.mod_row:
                                if self.column==1:
                                        self.mod_names.append(data)
@@ -175,7 +184,7 @@ class ItemDataParser(html.parser.HTMLParser):
                        else:
                                if self.column==2:
                                        self.current_item = self.create_item(data, self.current_heading)
-                                       self.items[data] = self.current_item
+                                       self.items.append(self.current_item)
                                elif self.column==3:
                                        self.current_item.droplevel = int(data)
                                elif self.column>=4:
@@ -237,11 +246,59 @@ def write_best_category(out, prefix, items, steps):
                out.write("\t\t};\n")
        out.write("\t};\n};\n")
 
+def print_debug(items):
+       upgrades = set()
+       for it in items:
+               for u in it.upgrades.values():
+                       if u:
+                               upgrades.add(u.name)
+
+       printed = set()
+
+       queue = []
+       while 1:
+               for it in items:
+                       if it.name not in upgrades and it.name not in printed:
+                               queue.append(it)
+                               break
+
+               if not queue:
+                       break
+
+               while queue:
+                       it = queue.pop(0)
+                       if it.name in printed:
+                               continue
+                       printed.add(it.name)
+
+                       print(it.name)
+                       print("  level: {}".format(it.droplevel))
+                       if isinstance(it, Armor):
+                               print("  armor: {}".format(it.armor))
+                               print("  evasion: {}".format(it.evasion))
+                               print("  energy shield: {}".format(it.energy_shield))
+                       elif isinstance(it, Weapon):
+                               print("  dps: {}".format(it.dps))
+                       for n, v in it.mods.items():
+                               print("  {}: {}".format(n, v))
+                       for t, u in it.upgrades.items():
+                               if u:
+                                       print("  upgrade {}: {}".format(t, u.name))
+                                       if u.name not in printed:
+                                               queue.insert(0, u)
+
 def main():
+       parser = argparse.ArgumentParser()
+       parser.add_argument("-g", "--debug", action="store_true", dest="debug")
+       args = parser.parse_args()
+
        r = requests.get("https://www.pathofexile.com/item-data/armour")
        p = ArmorDataParser()
        p.feed(codecs.decode(r.content, r.encoding))
 
+       if args.debug:
+               print_debug(p.items)
+
        types = {"robe": lambda i: (not i.armor and not i.evasion and i.energy_shield),
                "cloth": lambda i: (not i.armor and i.evasion and i.energy_shield),
                "leather": lambda i: (not i.armor and i.evasion and not i.energy_shield),
@@ -252,27 +309,10 @@ def main():
        out = open("armor.txt", "w")
 
        for t, f in types.items():
-               items = [i for i in p.items.values() if f(i)]
+               items = [i for i in p.items if f(i)]
 
                for i in range(1, 3):
                        write_best_category(out, "armor.{}".format(t), items, i)
-                       """out.write('category "armor.{}.{}_at_level"\n'.format(t, best[i-1]))
-                       out.write("{\n\tor\n\t{\n")
-                       for it in items:
-                               next_level = 0
-                               n = it.name
-                               for k in range(i):
-                                       n = p.upgrades.get(n, "")
-                               if n:
-                                       next_level = p.items[n].droplevel
-                               if next_level:
-                                       out.write("\t\tand\n\t\t{\n")
-                                       out.write('\t\t\tbase_type "{}";\n'.format(it.name))
-                                       out.write("\t\t\tmax_item_level {};\n".format(next_level-1))
-                                       out.write("\t\t};\n")
-                               else:
-                                       out.write('\t\tbase_type "{}";\n'.format(it.name))
-                       out.write("\t};\n};\n")"""
 
                out.write('category "armor.{}"\n'.format(t))
                out.write("{\n\tor\n\t{\n")
@@ -280,6 +320,13 @@ def main():
                        out.write('\t\tbase_type "{}";\n'.format(it.name))
                out.write("\t};\n};\n")
 
+       for b in ("best", "second"):
+               out.write('category "armor.{}_at_level"\n'.format(b))
+               out.write('{\n\tor\n\t{\n')
+               for t in types.keys():
+                       out.write('\t\tcategory "armor.{}.{}_at_level";\n'.format(t, b))
+               out.write("\t};\n};\n")
+
        out.write('category "armor"\n{\n\tor\n\t{\n')
        for h in p.headings:
                out.write('\t\tclass "{}";\n'.format(h))
@@ -289,14 +336,24 @@ def main():
        p = WeaponDataParser()
        p.feed(codecs.decode(r.content, r.encoding))
 
+       if args.debug:
+               print_debug(p.items)
+
        out = open("weapons.txt", "w")
 
        for h in p.headings:
-               items = [i for i in p.items.values() if i.kind==h]
+               items = [i for i in p.items if i.kind==h]
 
                for i in range(1, 3):
                        write_best_category(out, "weapon.{}".format(h.lower().replace(' ', '_')), items, i)
 
+       for b in ("best", "second"):
+               out.write('category "weapon.{}_at_level"\n'.format(b))
+               out.write('{\n\tor\n\t{\n')
+               for h in p.headings:
+                       out.write('\t\tcategory "weapon.{}.{}_at_level";\n'.format(h.lower().replace(' ', '_'), b))
+               out.write("\t};\n};\n")
+
        out.write('category "weapon"\n{\n\tor\n\t{\n')
        for h in p.headings:
                out.write('\t\tclass "{}";\n'.format(h))