]> git.tdb.fi Git - poefilter.git/blob - scrape-item-data.py
Allow range conditions to be merged within and conditions
[poefilter.git] / scrape-item-data.py
1 #!/usr/bin/python3
2
3 import requests
4 import html.parser
5 import codecs
6
7 class Item:
8         def __init__(self, name, kind):
9                 self.name = name
10                 self.kind = kind
11                 self.droplevel = 0
12                 self.upgrade = None
13
14         def is_upgrade_for(self, other):
15                 return self.kind==other.kind and self.droplevel>other.droplevel
16
17 class Armor(Item):
18         def __init__(self, name, kind):
19                 super(Armor, self).__init__(name, kind)
20
21                 self.armor = 0
22                 self.evasion = 0
23                 self.energy_shield = 0
24
25         def is_upgrade_for(self, other):
26                 if (self.armor!=0)!=(other.armor!=0):
27                         return False
28                 if (self.evasion!=0)!=(other.evasion!=0):
29                         return False
30                 if (self.energy_shield!=0)!=(other.energy_shield!=0):
31                         return False
32
33                 return super(Armor, self).is_upgrade_for(other)
34
35 class Weapon(Item):
36         def __init__(self, name, kind):
37                 super(Weapon, self).__init__(name, kind)
38
39                 self.speed = 0
40                 self.dps = 0
41
42 class ItemDataParser(html.parser.HTMLParser):
43         def __init__(self):
44                 super(ItemDataParser, self).__init__()
45
46                 self.in_items_table = False
47                 self.column = 0
48                 self.in_cell = False
49                 self.ignore_row = False
50                 self.items = {}
51                 self.current_item = None
52                 self.in_heading = False
53                 self.headings = []
54                 self.current_heading = None
55
56         def handle_starttag(self, tag, attrs):
57                 if tag=="table":
58                         for n, v in attrs:
59                                 if n=="class" and v=="itemDataTable":
60                                         self.in_items_table = True
61                 elif tag=="tr":
62                         self.ignore_row = False
63                         for n, v in attrs:
64                                 if n=="class" and v.endswith("_mod"):
65                                         self.ignore_row = True
66                         self.column = 0
67                         self.current_item = None
68                 elif tag=="td":
69                         self.column += 1
70                         self.in_cell = True
71                 elif tag=="h1":
72                         self.in_heading = True
73
74         def handle_endtag(self, tag):
75                 if tag=="table":
76                         self.in_items_table = False
77                 elif tag=="td":
78                         self.in_cell = False
79                 elif tag=="tr":
80                         if self.current_item:
81                                 for it in self.items.values():
82                                         if not it.upgrade and self.current_item.is_upgrade_for(it):
83                                                 it.upgrade = self.current_item
84                                                 break
85                 elif tag=="h1":
86                         self.in_heading = False
87
88         def handle_data(self, data):
89                 data = data.strip()
90                 if self.in_heading:
91                         self.current_heading = data
92                         self.headings.append(self.current_heading)
93                 elif self.in_items_table and self.in_cell and data and not self.ignore_row:
94                         if self.column==2:
95                                 self.current_item = self.create_item(data, self.current_heading)
96                                 self.items[data] = self.current_item
97                         elif self.column==3:
98                                 self.current_item.droplevel = int(data)
99                         elif self.column>=4:
100                                 self.handle_value(self.column, data)
101
102         def create_item(self, name, kind):
103                 pass
104
105         def handle_value(self, column, data):
106                 pass
107
108 class ArmorDataParser(ItemDataParser):
109         def create_item(self, name, kind):
110                 return Armor(name, kind)
111
112         def handle_value(self, column, data):
113                 if column==4:
114                         self.current_item.armor = int(data)
115                 elif column==5:
116                         self.current_item.evasion = int(data)
117                 elif column==6:
118                         self.current_item.energy_shield = int(data)
119
120 class WeaponDataParser(ItemDataParser):
121         def create_item(self, name, kind):
122                 return Weapon(name, kind)
123
124         def handle_value(self, column, data):
125                 if column==5:
126                         self.current_item.speed = float(data)
127                 elif column==6:
128                         self.current_item.dps = float(data)
129
130 def write_best_category(out, prefix, items, steps):
131         best = ["best", "second"]
132
133         out.write('category "{}.{}_at_level"\n'.format(prefix, best[steps-1]))
134         out.write("{\n\tor\n\t{\n")
135         for it in items:
136                 upgrade = it
137                 for k in range(steps):
138                         upgrade = upgrade.upgrade
139                         if not upgrade:
140                                 break
141                 out.write("\t\tand\n\t\t{\n")
142                 if upgrade:
143                         out.write('\t\t\tbase_type "{}";\n'.format(it.name))
144                         out.write("\t\t\titem_level {} {};\n".format(it.droplevel, upgrade.droplevel-1))
145                 else:
146                         out.write('\t\t\tbase_type "{}";\n'.format(it.name))
147                         out.write("\t\t\tmin_item_level {};\n".format(it.droplevel))
148                 out.write("\t\t};\n")
149         out.write("\t};\n};\n")
150
151 def main():
152         r = requests.get("https://www.pathofexile.com/item-data/armour")
153         p = ArmorDataParser()
154         p.feed(codecs.decode(r.content, r.encoding))
155
156         types = {"robe": lambda i: (not i.armor and not i.evasion and i.energy_shield),
157                 "cloth": lambda i: (not i.armor and i.evasion and i.energy_shield),
158                 "leather": lambda i: (not i.armor and i.evasion and not i.energy_shield),
159                 "scale": lambda i: (i.armor and i.evasion and not i.energy_shield),
160                 "plate": lambda i: (i.armor and not i.evasion and not i.energy_shield),
161                 "chain": lambda i: (i.armor and not i.evasion and i.energy_shield)}
162
163         out = open("armor.txt", "w")
164
165         for t, f in types.items():
166                 items = [i for i in p.items.values() if f(i)]
167
168                 for i in range(1, 3):
169                         write_best_category(out, "armor.{}".format(t), items, i)
170                         """out.write('category "armor.{}.{}_at_level"\n'.format(t, best[i-1]))
171                         out.write("{\n\tor\n\t{\n")
172                         for it in items:
173                                 next_level = 0
174                                 n = it.name
175                                 for k in range(i):
176                                         n = p.upgrades.get(n, "")
177                                 if n:
178                                         next_level = p.items[n].droplevel
179                                 if next_level:
180                                         out.write("\t\tand\n\t\t{\n")
181                                         out.write('\t\t\tbase_type "{}";\n'.format(it.name))
182                                         out.write("\t\t\tmax_item_level {};\n".format(next_level-1))
183                                         out.write("\t\t};\n")
184                                 else:
185                                         out.write('\t\tbase_type "{}";\n'.format(it.name))
186                         out.write("\t};\n};\n")"""
187
188                 out.write('category "armor.{}"\n'.format(t))
189                 out.write("{\n\tor\n\t{\n")
190                 for it in items:
191                         out.write('\t\tbase_type "{}";\n'.format(it.name))
192                 out.write("\t};\n};\n")
193
194         out.write('category "armor"\n{\n\tor\n\t{\n')
195         for h in p.headings:
196                 out.write('\t\tclass "{}";\n'.format(h))
197         out.write("\t};\n};\n")
198
199         r = requests.get("https://www.pathofexile.com/item-data/weapon")
200         p = WeaponDataParser()
201         p.feed(codecs.decode(r.content, r.encoding))
202
203         out = open("weapons.txt", "w")
204
205         for h in p.headings:
206                 items = [i for i in p.items.values() if i.kind==h]
207
208                 for i in range(1, 3):
209                         write_best_category(out, "weapon.{}".format(h.lower().replace(' ', '_')), items, i)
210
211         out.write('category "weapon"\n{\n\tor\n\t{\n')
212         for h in p.headings:
213                 out.write('\t\tclass "{}";\n'.format(h))
214         out.write("\t};\n};\n")
215
216 if __name__=="__main__":
217         main()