Buongiorno a tutti, stavo provando a risolvere camera dei segreti con fft ma già dal 3 subtask inizia ad andare in tle. la mia idea era calcolare p(x) come la produttoria di (x+Ri) per i=0,i<N, e poi sostituire a x Bi, e moltiplicando ans per p(Bi). tuttavia il codice è molto lento. qualche consiglio?
Ecco il codice, sto usando l’fft che si trova su usaco guide.
#include <bits/stdc++.h>
using namespace std;
constexpr int MOD=104857601;
using ll=long long;
namespace USACO {
using db = double; // or double, if TL is tight
using str = string; // yay python!
using vl = vector<ll>;
using vi = vector<int>;
#define tcT template <class T
#define tcTU tcT, class U
tcT > using V = vector<T>;
tcT, size_t SZ > using AR = array<T, SZ>;
tcT > using PR = pair<T, T>;
// pairs
#define mp make_pair
#define f first
#define s second
#define sz(x) int((x).size())
// loops
#define FOR(i, a, b) for (int i = (a); i < (b); ++i)
#define F0R(i, a) FOR(i, 0, a)
#define ROF(i, a, b) for (int i = (b)-1; i >= (a); --i)
#define R0F(i, a) ROF(i, 0, a)
#define each(a, x) for (auto &a : x)
// INPUT
#define tcTUU tcT, class... U
tcT > void re(T &x) { cin >> x; }
tcTUU > void re(T &t, U &...u) {
re(t);
re(u...);
}
tcT > void re(V<T> &x) { each(a, x) re(a); }
void setPrec() { cout << fixed << setprecision(15); }
void unsyncIO() { cin.tie(0)->sync_with_stdio(0); }
void setIO() {
unsyncIO();
setPrec();
}
#define rep(i, a, b) for (int i = a; i < (b); ++i)
typedef pair<int, int> pii;
typedef complex<double> C;
void fft(vector<C> &a) {
int n = sz(a), L = 31 - __builtin_clz(n);
static vector<complex<long double>> R(2, 1);
static vector<C> rt(2, 1); // (^ 10% faster if double)
for (static int k = 2; k < n; k *= 2) {
R.resize(n);
rt.resize(n);
auto x = polar(1.0L, acos(-1.0L) / k);
rep(i, k, 2 * k) rt[i] = R[i] = i & 1 ? R[i / 2] * x : R[i / 2];
}
vi rev(n);
rep(i, 0, n) rev[i] = (rev[i / 2] | (i & 1) << L) / 2;
rep(i, 0, n) if (i < rev[i]) swap(a[i], a[rev[i]]);
for (int k = 1; k < n; k *= 2)
for (int i = 0; i < n; i += 2 * k) rep(j, 0, k) {
// C z = rt[j+k] * a[i+j+k]; // (25% faster if hand-rolled) ///
// include-line
auto x = (double *)&rt[j + k],
y = (double *)&a[i + j + k]; /// exclude-line
C z(x[0] * y[0] - x[1] * y[1],
x[0] * y[1] + x[1] * y[0]); /// exclude-line
a[i + j + k] = a[i + j] - z;
a[i + j] += z;
}
}
typedef vector<ll> vl;
vl convMod(const vl &a, const vl &b) {
if (a.empty() || b.empty()) return {};
vl res(sz(a) + sz(b) - 1);
int B = 32 - __builtin_clz(sz(res)), n = 1 << B, cut = int(sqrt(MOD));
vector<C> L(n), R(n), outs(n), outl(n);
rep(i, 0, sz(a)) L[i] = C((int)a[i] / cut, (int)a[i] % cut);
rep(i, 0, sz(b)) R[i] = C((int)b[i] / cut, (int)b[i] % cut);
fft(L), fft(R);
rep(i, 0, n) {
int j = -i & (n - 1);
outl[j] = (L[i] + conj(L[j])) * R[i] / (2.0 * n);
outs[j] = (L[i] - conj(L[j])) * R[i] / (2.0 * n) / 1i;
}
fft(outl), fft(outs);
rep(i, 0, sz(res)) {
ll av = ll(real(outl[i]) + .5), cv = ll(imag(outs[i]) + .5);
ll bv = ll(imag(outl[i]) + .5) + ll(real(outs[i]) + .5);
res[i] = ((av % MOD * cut + bv) % MOD * cut + cv) % MOD;
}
return res;
}
}
int solve(int N,vector<int>a,vector<int>b){
vector<ll>A={a[0],1};
vector<ll>B={a[1],1};
vector<ll>res=USACO::convMod(A,B);
for(int i=2;i<N;i++){
A={a[i],1};
res=USACO::convMod(res,A);
}
ll ans=1;
for(int i=0;i<N;i++){
ll temp=0;
ll power=b[i]%MOD;
for(int j=0;j<res.size();j++){
if(j>0){
temp=(temp+(res[j]*power)%MOD)%MOD;
power=(power*b[i])%MOD;
}
else
temp=(temp+res[j])%MOD;
}
ans=(ans*temp)%MOD;
}
return ans;
}