+import sys
import requests
import html.parser
import codecs
other_aps = other.amount/other.duration
return (aps>other_aps and self.amount>other.amount)
+class SkillGem:
+ def __init__(self, name):
+ self.name = name
+ self.primary_attribute = None
+ self.secondary_attribute = None
+ self.price = None
+ self.vaal = False
+ self.drop_only = False
class WikiParser(html.parser.HTMLParser):
def __init__(self):
super(WikiParser, self).__init__()
+ self.ignore = 0
+ def handle_starttag(self, tag, attrs):
+ if tag=="span":
+ if self.ignore:
+ self.ignore += 1
+ else:
+ for n, v in attrs:
+ if n=="class" and "c-item-hoverbox__display" in v:
+ self.ignore = 1
+ def handle_endtag(self, tag):
+ if tag=="span":
+ if self.ignore:
+ self.ignore -= 1
+class WikiTableParser(WikiParser):
+ def __init__(self):
+ super(WikiTableParser, self).__init__()
self.in_items_table = False
self.column = 0
self.in_cell = False
- self.ignore_data = 0
self.items = []
self.current_item = None
def handle_starttag(self, tag, attrs):
+ super(WikiTableParser, self).handle_starttag(tag, attrs)
if tag=="table":
for n, v in attrs:
if n=="class" and "wikitable" in v:
self.column += 1
if self.in_items_table:
self.in_cell = True
- elif tag=="span":
- if self.ignore_data:
- self.ignore_data += 1
- else:
- for n, v in attrs:
- if n=="class" and "c-item-hoverbox__display" in v:
- self.ignore_data = 1
def handle_endtag(self, tag):
+ super(WikiTableParser, self).handle_endtag(tag)
if tag=="table":
self.in_items_table = False
elif tag=="td":
self.in_cell = False
- elif tag=="span":
- if self.ignore_data:
- self.ignore_data -= 1
def handle_data(self, data):
- if self.ignore_data:
+ if self.ignore:
data = data.strip()
def handle_value(self, column, data):
-class DivinationCardsParser(WikiParser):
+class DivinationCardsParser(WikiTableParser):
def handle_starttag(self, tag, attrs):
super(DivinationCardsParser, self).handle_starttag(tag, attrs)
- if tag=="span" and not self.ignore_data:
+ if tag=="span" and not self.ignore:
if self.in_cell and self.current_item and not self.current_item.reward_kind and self.column==3:
for n, v in attrs:
if n=="class":
elif "Map" in data:
self.current_item.reward_kind = "map"
-class FlasksParser(WikiParser):
+class FlasksParser(WikiTableParser):
def __init__(self, utility=False):
super(FlasksParser, self).__init__()
elif (column==4 and not self.utility) or (column==3 and self.utility):
self.current_item.duration = float(data)
+class SkillGemListParser(WikiParser):
+ def __init__(self):
+ super(SkillGemListParser, self).__init__()
+ self.in_subheading = False
+ self.in_list = False
+ self.links = []
+ def handle_starttag(self, tag, attrs):
+ super(SkillGemListParser, self).handle_starttag(tag, attrs)
+ if tag=="h2":
+ self.in_subheading = True
+ elif tag=="a":
+ if self.in_list and not self.ignore:
+ for n, v in attrs:
+ if n=="href":
+ self.links.append(v)
+ def handle_endtag(self, tag):
+ super(SkillGemListParser, self).handle_endtag(tag)
+ if tag=="h2":
+ self.in_subheading = False
+ def handle_data(self, data):
+ data = data.strip()
+ if self.in_subheading:
+ self.in_list = (data=="List")
+class SkillGemParser(WikiParser):
+ prices = (("alchemy", "Orb of Alchemy"),
+ ("chance", "Orb of Chance"),
+ ("alteration", "Orb of Alteration"),
+ ("transmute", "Orb of Transmutation"),
+ ("wisdom", "Scroll of Wisdom"))
+ def __init__(self):
+ super(SkillGemParser, self).__init__()
+ self.item = None
+ self.in_heading = False
+ self.in_subheading = False
+ self.in_subheading = False
+ self.in_infobox = 0
+ self.in_box_heading = False
+ self.in_purchase = False
+ self.in_progression = False
+ self.in_acquisition = False
+ self.have_acquisition_data = False
+ self.row = 0
+ self.column = 0
+ self.attribute_reqs = []
+ def handle_starttag(self, tag, attrs):
+ super(SkillGemParser, self).handle_starttag(tag, attrs)
+ if tag=="h1":
+ self.in_heading = True
+ elif tag=="h2":
+ self.in_subheading = True
+ elif tag=="span":
+ if self.in_infobox:
+ self.in_infobox += 1
+ else:
+ for n, v in attrs:
+ if n=="class" and "item-box" in v:
+ self.in_infobox = 1
+ elif tag=="em":
+ if self.in_infobox:
+ for n, v in attrs:
+ if n=="class" and "header" in v:
+ self.in_purchase = False
+ self.in_box_heading = True
+ elif tag=="table":
+ self.row = 0
+ elif tag=="tr":
+ self.row += 1
+ self.column = 0
+ elif tag=="td" or tag=="th":
+ self.column += 1
+ elif tag=="img":
+ if self.in_progression and self.column>=3:
+ for n, v in attrs:
+ if n=="alt":
+ self.attribute_reqs.append([v, 0])
+ elif tag=="div":
+ for n, v in attrs:
+ if n=="id" and v=="footer":
+ if not self.have_acquisition_data:
+ self.item.drop_only = True
+ def handle_endtag(self, tag):
+ super(SkillGemParser, self).handle_endtag(tag)
+ if tag=="h1":
+ self.in_heading = False
+ elif tag=="h2":
+ self.in_subheading = False
+ elif tag=="span":
+ if self.in_infobox:
+ self.in_infobox -= 1
+ elif tag=="em":
+ self.in_box_heading = False
+ elif tag=="tr":
+ if self.in_progression and self.row==2 and self.attribute_reqs:
+ self.attribute_reqs.sort(key=lambda r: r[1])
+ self.item.primary_attribute = self.attribute_reqs[0][0]
+ if len(self.attribute_reqs)>1:
+ self.item.secondary_attribute = self.attribute_reqs[1][0]
+ def handle_data(self, data):
+ data = data.strip()
+ if not data:
+ return
+ if self.in_heading:
+ name = data
+ paren = name.find('(')
+ if paren>=0:
+ name = name[:paren].strip()
+ self.item = SkillGem(name)
+ if name.startswith("Vaal"):
+ self.item.vaal = True
+ elif self.in_subheading:
+ self.in_progression = ("progression" in data)
+ self.in_acquisition = ("acquisition" in data)
+ elif self.in_box_heading:
+ if "Purchase" in data:
+ self.in_purchase = True
+ elif self.in_purchase:
+ if self.column==2:
+ for p, n in SkillGemParser.prices:
+ if n==data:
+ self.item.price = p
+ elif self.in_progression:
+ if self.row==2 and self.column>=3 and self.column<3+len(self.attribute_reqs):
+ self.attribute_reqs[self.column-3][1] = int(data)
+ elif self.in_acquisition:
+ self.have_acquisition_data = True
def scrape_flasks(out, url, kind):
r = requests.get(url)
p = FlasksParser()
out.write('\t\tbase_type "{}";\n'.format(it.name))
+ r = requests.get("https://pathofexile.gamepedia.com/List_of_skill_gems")
+ p = SkillGemListParser()
+ p.feed(codecs.decode(r.content, r.encoding))
+ gems = []
+ prefix = ""
+ for l in p.links:
+ r2 = requests.get("https://pathofexile.gamepedia.com"+l)
+ p2 = SkillGemParser()
+ p2.feed(codecs.decode(r2.content, r2.encoding))
+ gems.append(p2.item)
+ sys.stdout.write(prefix+"{}\n".format(p2.item.name))
+ prefix = "\033[1A\033[K"
+ sys.stdout.write(prefix)
+ out = open("skillgems.txt", "w")
+ out.write('category "skillgem.vaal"\n{\n\tclass "Skill Gem";\n\tor\n\t{\n')
+ for g in gems:
+ if g.vaal:
+ out.write('\t\tbase_type "{}";\n'.format(g.name))
+ out.write("\t};\n};\n")
+ out.write('category "skillgem.drop"\n{\n\tclass "Skill Gem";\n\tor\n\t{\n')
+ for g in gems:
+ if (g.drop_only or not g.price) and not g.vaal:
+ out.write('\t\tbase_type "{}";\n'.format(g.name))
+ out.write("\t};\n};\n")
+ for p, n in SkillGemParser.prices:
+ out.write('category "skillgem.{}"\n'.format(p))
+ out.write('{\n\tclass "Skill Gem";\n\tor\n\t{\n')
+ for g in gems:
+ if g.price==p:
+ out.write('\t\tbase_type "{}";\n'.format(g.name))
+ out.write("\t};\n};\n")
+ for a in ["strength", "dexterity", "intelligence"]:
+ out.write('category "skillgem.{}"\n'.format(a))
+ out.write('{\n\tclass "Skill Gem";\n\tor\n\t{\n')
+ for g in gems:
+ if g.primary_attribute==a:
+ out.write('\t\tbase_type "{}";\n'.format(g.name))
+ out.write("\t};\n};\n")
if __name__=="__main__":