main.py 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168
  1. #!/bin/python3
  2. # Programme assez bordélique pour parser des tickets de caisse Super U
  3. # Nécessite une base de donnée SQLite avec la structure suivante :
  4. '''
  5. CREATE TABLE "achats" (
  6. "id" INTEGER,
  7. "associated_id" INTEGER,
  8. "quantite" INTEGER NOT NULL,
  9. "article" TEXT NOT NULL,
  10. "categorie" TEXT,
  11. "prix" INTEGER NOT NULL,
  12. "date" TEXT NOT NULL,
  13. "heure" TEXT NOT NULL,
  14. "ville" TEXT NOT NULL,
  15. PRIMARY KEY("id" AUTOINCREMENT)
  16. );
  17. '''
  18. import sys
  19. import os
  20. import PyPDF2
  21. import sqlite3
  22. def parsePDF(f):
  23. with open(f, 'rb') as f:
  24. reader = PyPDF2.PdfReader(f)
  25. contents = reader.getPage(0).extractText().split('\n')
  26. # Detecte si c'est un ticket de caisse ou ticket client
  27. if ("CARTE BANCAIRE" in contents[4]) or ("CARTE BANCAIRE" in contents[1]):
  28. print('Pattern found : don\'t look like a good ticket. Next one...\n')
  29. return ''
  30. else:
  31. print('No pattern found : seems to be a good one. Parsing it...')
  32. #print(contents)
  33. x = 0
  34. for i in contents:
  35. print("[" + str(x) + "] " + contents[x])
  36. x=x+1
  37. if "===========" in contents[x]:
  38. print("matching with END pattern")
  39. return contents[3:x]
  40. elif "-----------" in contents[x]:
  41. print("matching with END pattern")
  42. return contents[3:x]
  43. else:
  44. pass
  45. def parseArticles(content):
  46. if len(content) == 0:
  47. return ""
  48. x = 0
  49. for i in content:
  50. #print("[" + str(x) + "] " + content[x])
  51. x=x+1
  52. ville = content[2]
  53. info = content[12].split()
  54. date = content[12].split()[2]
  55. heure = content[12].split()[3]
  56. article = None
  57. z=-1
  58. for line in content[14:]:
  59. z=z+1
  60. print(line)
  61. if ">>>>" in line:
  62. categorie = line.replace('>', '')[2:]
  63. elif "Pourcentage" in line:
  64. continue
  65. elif not "€" in line:
  66. article = line.split(" ")[0][1:]
  67. continue
  68. elif "€" in line:
  69. #
  70. # Si article pas nul, c'est qu'on est sur un multi ligne
  71. #
  72. if article != None:
  73. print(line.split())
  74. if (" x " in line) and ("€/kg" in line) and (" kg " in line):
  75. quantite = line.split()[0]
  76. prix = line.split()[3] + " €"
  77. if (" x " in line) and ("€" in line) and (not "€/kg" in line):
  78. quantite = line.split()[0]
  79. prix = line.split()[2] + " €"
  80. else:
  81. print("AAAAAAAAAA")
  82. print(line.split(" "))
  83. # On à une ligne type poid avec ou sans prix... les relous quoi !
  84. # Nom d'article, mais le reste est ligne du dessous
  85. # donc on sort de la boucle quand on à notre variable
  86. article = line.split(" ")[0][1:]
  87. quantite = 1
  88. prix = line.split(" ")[len(line.split(" ")) - 2]
  89. print("on sort")
  90. # On pousse la requête !
  91. if (article != None) and (prix != None) and (quantite != None):
  92. print("=> Date / Heure : " + date + " " + heure)
  93. print("=> Catégorie : " + categorie)
  94. print("=> Article : " + article)
  95. print("=> Quantité/poid : " + str(quantite))
  96. print("=> Prix : " + prix)
  97. print("\n")
  98. sql = ''' INSERT INTO achats(quantite,article,categorie,prix,date,heure,ville)
  99. VALUES(?,?,?,?,?,?,?) '''
  100. cur = conn.cursor()
  101. cur.execute(sql, (str(quantite), article, categorie, prix, date, heure, ville ))
  102. conn.commit()
  103. article = None
  104. prix = None
  105. quantite = None
  106. # Comme on est des cochons et que l'algo est trop mauvais, on peut avoir des erreurs de parsing
  107. # on va donc clean les entrées qui ont des valeurs nulles.
  108. # elif (article != None) and (prix == "") and (quantite != None):
  109. # continue
  110. def create_connection(db_file):
  111. conn = None
  112. try:
  113. conn = sqlite3.connect(db_file)
  114. except Error as e:
  115. print(e)
  116. return conn
  117. def ajout_article(conn, project):
  118. sql = ''' INSERT INTO achats(name,begin_date,end_date)
  119. VALUES(?,?,?) '''
  120. cur = conn.cursor()
  121. cur.execute(sql, project)
  122. conn.commit()
  123. return cur.lastrowid
  124. if __name__ == '__main__':
  125. if len(sys.argv) != 2:
  126. print('Erreur')
  127. exit(1)
  128. print(f'Script name is {sys.argv[0]}')
  129. print(f'Path with PDF files is {sys.argv[1]}')
  130. # to store files in a list
  131. list = []
  132. conn = create_connection("database")
  133. # dirs=directories
  134. for (root, dirs, file) in os.walk(sys.argv[1]):
  135. for f in file:
  136. if '.pdf' in f:
  137. print('=> Play with file : ' + f)
  138. parseArticles(parsePDF(sys.argv[1]+"/"+f))
  139. conn.close()
  140. print("terminé")