Discuție Proiect:Comune-Sate/Resurse/Lista comunelor după numărul de sate

Conținutul paginii nu este suportat în alte limbi.
De la Wikipedia, enciclopedia liberă

Am pornit de la script-ul de la Proiect:Cod Wikipedia/WinBatch/Wget download și l-am modificat pentru a putea număra satele din fiecare comună:


set PATTERN="componenta *= *"
set PATTERN="sate *= *"
grep -i %PATTERN% index*.* > 000-grep-result.txt

cat 000-grep-result.txt | sed -e "s/_/ /g" | sed -e "s/\&action=edit//" | sed -e "s/index.php@title=//" ^
 | sed -e "s/\]\]/_1_/" ^
 | sed -e "s/\]\]/_2_/" ^
 | sed -e "s/\]\]/_3_/" ^
 | sed -e "s/\]\]/_4_/" ^
 | sed -e "s/\]\]/_5_/" ^
 | sed -e "s/\]\]/_6_/" ^
 | sed -e "s/\]\]/_7_/" ^
 | sed -e "s/\]\]/_8_/" ^
 | sed -e "s/\]\]/_9_/" ^
 | sed -e "s/\]\]/_a_/" ^
 | sed -e "s/\]\]/_b_/" ^
 | sed -e "s/\]\]/_c_/" ^
 | sed -e "s/\]\]/_d_/" ^
 | sed -e "s/\]\]/_e_/" ^
 | sed -e "s/\]\]/_f_/" ^
 | sed -e "s/\]\]/_g_/" ^
 | sed -e "s/\]\]/_h_/" ^
 | sed -e "s/\]\]/_i_/" ^
 | sed -e "s/\]\]/_j_/" ^
 | sed -e "s/\]\]/_k_/" ^
 | sed -e "s/\]\]/_l_/" ^
 | sed -e "s/\]\]/_m_/" ^
 | sed -e "s/\]\]/_n_/" ^
 | sed -e "s/\]\]/_o_/" ^
 | sed -e "s/\]\]/_p_/" ^
 | sed -e "s/\]\]/_q_/" ^
 | sed -e "s/\]\]/_r_/" ^
 | sed -e "s/\]\]/_s_/" ^
 | sed -e "s/\]\]/_t_/" ^
 | sed -e "s/\]\]/_u_/" ^
 | sed -e "s/\]\]/_v_/" ^
 | sed -e "s/\]\]/_w_/" ^
 | sed -e "s/\]\]/_x_/" ^
 | sed -e "s/\]\]/_y_/" ^
 | sed -e "s/\]\]/_z_/" ^
 | sed -e "s/\]\]/_A_/" ^
 | sed -e "s/\]\]/_B_/" ^
 | sed -e "s/\]\]/_C_/" ^
 | sed -e "s/\]\]/_D_/" ^
 | sed -e "s/\]\]/_E_/" ^
 | sed -e "s/\]\]/_F_/" ^
 | sed -e "s/\]\]/_G_/" ^
 | sed -e "s/\]\]/_H_/" ^
 | sed -e "s/\]\]/_I_/" ^
 | sed -e "s/\]\]/_J_/" ^
 | sort | uniq | grep -v "000-pattern" > 000-grep-result-clean1.txt
cat 000-grep-result-clean1.txt | sed -e "s/:.*_\(.\)_.*/:_\1_/" | sed -e "s/\x7c *componenta *= *//"> 000-grep-result-clean2.txt

cat 000-grep-result-clean2.txt ^
 | sed -e "s/_a_/10/" ^
 | sed -e "s/_b_/11/" ^
 | sed -e "s/_c_/12/" ^
 | sed -e "s/_d_/13/" ^
 | sed -e "s/_e_/14/" ^
 | sed -e "s/_f_/15/" ^
 | sed -e "s/_g_/16/" ^
 | sed -e "s/_h_/17/" ^
 | sed -e "s/_i_/18/" ^
 | sed -e "s/_j_/19/" ^
 | sed -e "s/_k_/20/" ^
 | sed -e "s/_l_/21/" ^
 | sed -e "s/_m_/22/" ^
 | sed -e "s/_n_/23/" ^
 | sed -e "s/_o_/24/" ^
 | sed -e "s/_p_/25/" ^
 | sed -e "s/_q_/26/" ^
 | sed -e "s/_r_/27/" ^
 | sed -e "s/_s_/28/" ^
 | sed -e "s/_t_/29/" ^
 | sed -e "s/_u_/30/" ^
 | sed -e "s/_v_/31/" ^
 | sed -e "s/_w_/32/" ^
 | sed -e "s/_x_/33/" ^
 | sed -e "s/_y_/34/" ^
 | sed -e "s/_z_/35/" ^
 | sed -e "s/_A_/36/" ^
 | sed -e "s/_B_/37/" ^
 | sed -e "s/_C_/38/" ^
 | sed -e "s/_D_/39/" ^
 | sed -e "s/_E_/40/" ^
 | sed -e "s/_F_/41/" ^
 | sed -e "s/_G_/42/" ^
 | sed -e "s/_H_/43/" ^
 | sed -e "s/_I_/44/" ^
 | sed -e "s/_J_/45/" ^
 | sed -e "s/_\(.\)_/\1/" ^
 > 000-grep-result-clean3.txt

echo1 -ne \xEF\xBB\xBF > 000-grep-result-in.txt
cat 000-grep-result-clean3.txt >> 000-grep-result-in.txt

cat 000-grep-result-in.txt ^
 | sed -e "s/\xc3\x25AE/\xc3\xAE/g" ^
 | sed -e "s/\xc3\x258E/\xc3\x8E/g" ^
 | sed -e "s/\xc3\x25A2/\xc3\xA2/g" ^
 | sed -e "s/\xc3\x2582/\xc3\x82/g" ^
 | sed -e "s/\xc4\x2583/\xc4\x83/g" ^
 | sed -e "s/\xc4\x2582/\xc4\x82/g" ^
 | sed -e "s/\xc8\x2599/\xc8\x99/g" ^
 | sed -e "s/\xc8\x2598/\xc8\x98/g" ^
 | sed -e "s/\xc8\x259B/\xc8\x9B/g" ^
 | sed -e "s/\xc8\x259A/\xc8\x9A/g" ^
 | sed -e "s/\xc5\x259F/\xc5\x9f/g" ^
 | sed -e "s/\xc5\x259E/\xc5\x9e/g" ^
 | sed -e "s/\xc5\x25A3/\xc5\xa3/g" ^
 | sed -e "s/\xc5\x25A2/\xc5\xa2/g" ^
 > 000-grep-result-clean-diacritics.txt

sort1 -t ":" -n -k2 000-grep-result-clean-diacritics.txt | sed -e "s/ sate$//" | sed -e "s/ de sate$//" > 000-grep-result-sorted.txt

cat 000-grep-result-sorted1.txt 000-grep-result-sorted2.txt | sort1 -t ":" -n -k2 > 000-grep-result-sorted-all.txt